[openib-general] Re: [RFC] Kernel uverbs changes for PathScale merge

Roland Dreier rolandd at cisco.com
Thu Oct 13 15:03:39 PDT 2005


OK, here's a new patch that adds a mask of allowed userspace commands
set by the kernel low-level driver.

Thanks, good catch Michael...

 - R.

--- include/rdma/ib_user_verbs.h	(revision 3707)
+++ include/rdma/ib_user_verbs.h	(working copy)
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -88,8 +89,11 @@ enum {
  * Make sure that all structs defined in this file remain laid out so
  * that they pack the same way on 32-bit and 64-bit architectures (to
  * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
+ * Specifically:
+ *  - Do not use pointer types -- pass pointers in __u64 instead.
+ *  - Make sure that any structure larger than 4 bytes is padded to a
+ *    multiple of 8 bytes.  Otherwise the structure size will be
+ *    different between 32-bit and 64-bit architectures.
  */
 
 struct ib_uverbs_async_event_desc {
@@ -261,6 +265,42 @@ struct ib_uverbs_create_cq_resp {
 	__u32 cqe;
 };
 
+struct ib_uverbs_poll_cq {
+	__u64 response;
+	__u32 cq_handle;
+	__u32 ne;
+	__u64 wc;
+};
+
+struct ib_uverbs_wc {
+	__u64 wr_id;
+	__u32 status;
+	__u32 opcode;
+	__u32 vendor_err;
+	__u32 byte_len;
+	__u32 imm_data;
+	__u32 qp_num;
+	__u32 src_qp;
+	__u32 wc_flags;
+	__u16 pkey_index;
+	__u16 slid;
+	__u8 sl;
+	__u8 dlid_path_bits;
+	__u8 port_num;
+	__u8 reserved;
+};
+
+struct ib_uverbs_poll_cq_resp {
+	__u32 count;
+	__u32 reserved;
+	struct ib_uverbs_wc wc[0];
+};
+
+struct ib_uverbs_req_notify_cq {
+	__u32 cq_handle;
+	__u32 solicited_only;
+};
+
 struct ib_uverbs_destroy_cq {
 	__u64 response;
 	__u32 cq_handle;
@@ -358,6 +398,127 @@ struct ib_uverbs_destroy_qp_resp {
 	__u32 events_reported;
 };
 
+/*
+ * Note: the ib_uverbs_sge structure isn't used anywhere, as the ib_sge
+ * structure is packed the same way on 32-bit and 64-bit architectures
+ * in both kernel and user space.  It's just here to document the ABI.
+ */
+
+struct ib_uverbs_sge {
+	__u64 addr;
+	__u32 length;
+	__u32 lkey;
+};
+
+struct ib_uverbs_send_wr {
+	__u64 wr_id; 
+	__u32 num_sge;
+	__u32 opcode;
+	__u32 send_flags;
+	__u32 imm_data;
+	union {
+		struct {
+			__u64 remote_addr;
+			__u32 rkey;
+			__u32 reserved;
+		} rdma;
+		struct {
+			__u64 remote_addr;
+			__u64 compare_add;
+			__u64 swap;
+			__u32 rkey;
+			__u32 reserved;
+		} atomic;
+		struct {
+			__u32 ah;
+			__u32 remote_qpn;
+			__u32 remote_qkey;
+			__u32 reserved;
+		} ud;
+	} wr;
+};
+
+struct ib_uverbs_post_send {
+	__u64 response;
+	__u32 qp_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ib_uverbs_send_wr send_wr[0];
+};
+
+struct ib_uverbs_post_send_resp {
+	__u32 bad_wr;
+};
+
+struct ib_uverbs_recv_wr {
+	__u64 wr_id;
+	__u32 num_sge;
+	__u32 reserved;
+};
+
+struct ib_uverbs_post_recv {
+	__u64 response;
+	__u32 qp_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ib_uverbs_recv_wr recv_wr[0];
+};
+
+struct ib_uverbs_post_recv_resp {
+	__u32 bad_wr;
+};
+
+struct ib_uverbs_post_srq_recv {
+	__u64 response;
+	__u32 srq_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ib_uverbs_recv_wr recv[0];
+};
+
+struct ib_uverbs_post_srq_recv_resp {
+	__u32 bad_wr;
+};
+
+struct ib_uverbs_global_route {
+	__u8  dgid[16];
+	__u32 flow_label;    
+	__u8  sgid_index;
+	__u8  hop_limit;
+	__u8  traffic_class;
+	__u8  reserved;
+};
+
+struct ib_uverbs_ah_attr {
+	struct ib_uverbs_global_route grh;
+	__u16 dlid;
+	__u8  sl;
+	__u8  src_path_bits;
+	__u8  static_rate;
+	__u8  is_global;
+	__u8  port_num;
+	__u8  reserved;
+};
+
+struct ib_uverbs_create_ah {
+	__u64 response;
+	__u64 user_handle;
+	__u32 pd_handle;
+	__u32 reserved;
+	struct ib_uverbs_ah_attr attr;
+};
+
+struct ib_uverbs_create_ah_resp {
+	__u32 ah_handle;
+};
+
+struct ib_uverbs_destroy_ah {
+	__u32 ah_handle;
+};
+
 struct ib_uverbs_attach_mcast {
 	__u8  gid[16];
 	__u32 qp_handle;
--- include/rdma/ib_verbs.h	(revision 3707)
+++ include/rdma/ib_verbs.h	(working copy)
@@ -951,6 +951,7 @@ struct ib_device {
 		IB_DEV_UNREGISTERED
 	}                            reg_state;
 
+	u64			     uverbs_cmd_mask;
 	int			     uverbs_abi_ver;
 
 	u8                           node_type;
--- core/uverbs_main.c	(revision 3740)
+++ core/uverbs_main.c	(working copy)
@@ -3,6 +3,7 @@
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -86,10 +87,17 @@ static ssize_t (*uverbs_cmd_table[])(str
 	[IB_USER_VERBS_CMD_DEREG_MR]      	= ib_uverbs_dereg_mr,
 	[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
 	[IB_USER_VERBS_CMD_CREATE_CQ]     	= ib_uverbs_create_cq,
+	[IB_USER_VERBS_CMD_POLL_CQ]     	= ib_uverbs_poll_cq,
+	[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]     	= ib_uverbs_req_notify_cq,
 	[IB_USER_VERBS_CMD_DESTROY_CQ]    	= ib_uverbs_destroy_cq,
 	[IB_USER_VERBS_CMD_CREATE_QP]     	= ib_uverbs_create_qp,
 	[IB_USER_VERBS_CMD_MODIFY_QP]     	= ib_uverbs_modify_qp,
 	[IB_USER_VERBS_CMD_DESTROY_QP]    	= ib_uverbs_destroy_qp,
+	[IB_USER_VERBS_CMD_POST_SEND]    	= ib_uverbs_post_send,
+	[IB_USER_VERBS_CMD_POST_RECV]    	= ib_uverbs_post_recv,
+	[IB_USER_VERBS_CMD_POST_SRQ_RECV]    	= ib_uverbs_post_srq_recv,
+	[IB_USER_VERBS_CMD_CREATE_AH]    	= ib_uverbs_create_ah,
+	[IB_USER_VERBS_CMD_DESTROY_AH]    	= ib_uverbs_destroy_ah,
 	[IB_USER_VERBS_CMD_ATTACH_MCAST]  	= ib_uverbs_attach_mcast,
 	[IB_USER_VERBS_CMD_DETACH_MCAST]  	= ib_uverbs_detach_mcast,
 	[IB_USER_VERBS_CMD_CREATE_SRQ]    	= ib_uverbs_create_srq,
@@ -111,7 +119,13 @@ static int ib_dealloc_ucontext(struct ib
 
 	down(&ib_uverbs_idr_mutex);
 
-	/* XXX Free AHs */
+	list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
+		struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id);
+		idr_remove(&ib_uverbs_ah_idr, uobj->id);
+		ib_destroy_ah(ah);
+		list_del(&uobj->list);
+		kfree(uobj);
+	}
 
 	list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
 		struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id);
@@ -514,7 +528,8 @@ static ssize_t ib_uverbs_write(struct fi
 
 	if (hdr.command < 0 				||
 	    hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
-	    !uverbs_cmd_table[hdr.command])
+	    !uverbs_cmd_table[hdr.command]		||
+	    !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
 		return -EINVAL;
 
 	if (!file->ucontext &&
--- core/uverbs.h	(revision 3707)
+++ core/uverbs.h	(working copy)
@@ -3,6 +3,7 @@
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -140,10 +141,17 @@ IB_UVERBS_DECLARE_CMD(reg_mr);
 IB_UVERBS_DECLARE_CMD(dereg_mr);
 IB_UVERBS_DECLARE_CMD(create_comp_channel);
 IB_UVERBS_DECLARE_CMD(create_cq);
+IB_UVERBS_DECLARE_CMD(poll_cq);
+IB_UVERBS_DECLARE_CMD(req_notify_cq);
 IB_UVERBS_DECLARE_CMD(destroy_cq);
 IB_UVERBS_DECLARE_CMD(create_qp);
 IB_UVERBS_DECLARE_CMD(modify_qp);
 IB_UVERBS_DECLARE_CMD(destroy_qp);
+IB_UVERBS_DECLARE_CMD(post_send);
+IB_UVERBS_DECLARE_CMD(post_recv);
+IB_UVERBS_DECLARE_CMD(post_srq_recv);
+IB_UVERBS_DECLARE_CMD(create_ah);
+IB_UVERBS_DECLARE_CMD(destroy_ah);
 IB_UVERBS_DECLARE_CMD(attach_mcast);
 IB_UVERBS_DECLARE_CMD(detach_mcast);
 IB_UVERBS_DECLARE_CMD(create_srq);
--- core/uverbs_cmd.c	(revision 3707)
+++ core/uverbs_cmd.c	(working copy)
@@ -665,6 +665,93 @@ err:
 	return ret;
 }
 
+ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
+			  const char __user *buf, int in_len,
+			  int out_len)
+{
+	struct ib_uverbs_poll_cq       cmd;
+	struct ib_uverbs_poll_cq_resp *resp;
+	struct ib_cq                  *cq;
+	struct ib_wc                  *wc;
+	int                            ret = 0;
+	int                            i;
+	int                            rsize;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL);
+	if (!wc)
+		return -ENOMEM;
+
+	rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc);
+	resp = kmalloc(rsize, GFP_KERNEL);
+	if (!resp) {
+		ret = -ENOMEM;
+		goto out_wc;
+	}
+
+	down(&ib_uverbs_idr_mutex);
+	cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+	if (!cq || cq->uobject->context != file->ucontext) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	resp->count = ib_poll_cq(cq, cmd.ne, wc);
+
+	for (i = 0; i < resp->count; i++) {
+		resp->wc[i].wr_id 	   = wc[i].wr_id;
+		resp->wc[i].status 	   = wc[i].status;
+		resp->wc[i].opcode 	   = wc[i].opcode;
+		resp->wc[i].vendor_err 	   = wc[i].vendor_err;
+		resp->wc[i].byte_len 	   = wc[i].byte_len;
+		resp->wc[i].imm_data 	   = wc[i].imm_data;
+		resp->wc[i].qp_num 	   = wc[i].qp_num;
+		resp->wc[i].src_qp 	   = wc[i].src_qp;
+		resp->wc[i].wc_flags 	   = wc[i].wc_flags;
+		resp->wc[i].pkey_index 	   = wc[i].pkey_index;
+		resp->wc[i].slid 	   = wc[i].slid;
+		resp->wc[i].sl 		   = wc[i].sl;
+		resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits;
+		resp->wc[i].port_num 	   = wc[i].port_num;
+	}
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize))
+		ret = -EFAULT;
+
+out:
+	up(&ib_uverbs_idr_mutex);
+	kfree(resp);
+
+out_wc:
+	kfree(wc);
+	return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
+				const char __user *buf, int in_len,
+				int out_len)
+{
+	struct ib_uverbs_req_notify_cq cmd;
+	struct ib_cq                  *cq;
+	int                            ret = -EINVAL;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	down(&ib_uverbs_idr_mutex);
+	cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+	if (cq && cq->uobject->context == file->ucontext) {
+		ib_req_notify_cq(cq, cmd.solicited_only ?
+					IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
+		ret = in_len;
+	}
+	up(&ib_uverbs_idr_mutex);
+
+	return ret;
+}
+
 ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 			     const char __user *buf, int in_len,
 			     int out_len)
@@ -1003,6 +1090,468 @@ out:
 	return ret ? ret : in_len;
 }
 
+ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+	struct ib_uverbs_post_send      cmd;
+	struct ib_uverbs_post_send_resp resp;
+	struct ib_uverbs_send_wr       *user_wr;
+	struct ib_send_wr              *wr = NULL, *last, *next, *bad_wr;
+	struct ib_qp                   *qp;
+	int                             i, sg_ind;
+	ssize_t                         ret = -EINVAL;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
+	    cmd.sge_count * sizeof (struct ib_uverbs_sge))
+		return -EINVAL;
+
+	if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
+		return -EINVAL;
+
+	user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
+	if (!user_wr)
+		return -ENOMEM;
+
+	down(&ib_uverbs_idr_mutex);
+
+	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+	if (!qp || qp->uobject->context != file->ucontext)
+		goto out;
+
+	sg_ind = 0;
+	last = NULL;
+	for (i = 0; i < cmd.wr_count; ++i) {
+		if (copy_from_user(user_wr,
+				   buf + sizeof cmd + i * cmd.wqe_size,
+				   cmd.wqe_size)) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		if (user_wr->num_sge + sg_ind > cmd.sge_count) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+			       user_wr->num_sge * sizeof (struct ib_sge),
+			       GFP_KERNEL);
+		if (!next) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		if (!last)
+			wr = next;
+		else
+			last->next = next;
+		last = next;
+
+		next->next       = NULL;
+		next->wr_id      = user_wr->wr_id;
+		next->num_sge    = user_wr->num_sge;
+		next->opcode     = user_wr->opcode;
+		next->send_flags = user_wr->send_flags;
+		next->imm_data   = user_wr->imm_data;
+
+		if (qp->qp_type == IB_QPT_UD) {
+			next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr,
+						  user_wr->wr.ud.ah);
+			if (!next->wr.ud.ah) {
+				ret = -EINVAL;
+				goto out;
+			}
+			next->wr.ud.remote_qpn  = user_wr->wr.ud.remote_qpn;
+			next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
+		} else {
+			switch (next->opcode) {
+			case IB_WR_RDMA_WRITE:
+			case IB_WR_RDMA_WRITE_WITH_IMM:
+			case IB_WR_RDMA_READ:
+				next->wr.rdma.remote_addr =
+					user_wr->wr.rdma.remote_addr;
+				next->wr.rdma.rkey        =
+					user_wr->wr.rdma.rkey;
+				break;
+			case IB_WR_ATOMIC_CMP_AND_SWP:
+			case IB_WR_ATOMIC_FETCH_AND_ADD:
+				next->wr.atomic.remote_addr =
+					user_wr->wr.atomic.remote_addr;
+				next->wr.atomic.compare_add =
+					user_wr->wr.atomic.compare_add;
+				next->wr.atomic.swap = user_wr->wr.atomic.swap;
+				next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
+				break;
+			default:
+				break;
+			}
+		}
+
+		if (next->num_sge) {
+			next->sg_list = (void *) next +
+				ALIGN(sizeof *next, sizeof (struct ib_sge));
+			if (copy_from_user(next->sg_list,
+					   buf + sizeof cmd +
+					   cmd.wr_count * cmd.wqe_size +
+					   sg_ind * sizeof (struct ib_sge),
+					   next->num_sge * sizeof (struct ib_sge))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			sg_ind += next->num_sge;
+		} else
+			next->sg_list = NULL;
+	}
+
+	resp.bad_wr = 0;
+	ret = qp->device->post_send(qp, wr, &bad_wr);
+	if (ret)
+		for (next = wr; next; next = next->next) {
+			++resp.bad_wr;
+			if (next == bad_wr)
+				break;
+		}
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
+		ret = -EFAULT;
+
+out:
+	up(&ib_uverbs_idr_mutex);
+
+	while (wr) {
+		next = wr->next;
+		kfree(wr);
+		wr = next;
+	}
+
+	kfree(user_wr);
+
+	return ret ? ret : in_len;
+}
+
+static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
+						    int in_len,
+						    u32 wr_count,
+						    u32 sge_count,
+						    u32 wqe_size)
+{
+	struct ib_uverbs_recv_wr *user_wr;
+	struct ib_recv_wr        *wr = NULL, *last, *next;
+	int                       sg_ind;
+	int                       i;
+	int                       ret;
+
+	if (in_len < wqe_size * wr_count +
+	    sge_count * sizeof (struct ib_uverbs_sge))
+		return ERR_PTR(-EINVAL);
+
+	if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
+		return ERR_PTR(-EINVAL);
+
+	user_wr = kmalloc(wqe_size, GFP_KERNEL);
+	if (!user_wr)
+		return ERR_PTR(-ENOMEM);
+
+	sg_ind = 0;
+	last = NULL;
+	for (i = 0; i < wr_count; ++i) {
+		if (copy_from_user(user_wr, buf + i * wqe_size,
+				   wqe_size)) {
+			ret = -EFAULT;
+			goto err;
+		}
+
+		if (user_wr->num_sge + sg_ind > sge_count) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+			       user_wr->num_sge * sizeof (struct ib_sge),
+			       GFP_KERNEL);
+		if (!next) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		if (!last)
+			wr = next;
+		else
+			last->next = next;
+		last = next;
+
+		next->next       = NULL;
+		next->wr_id      = user_wr->wr_id;
+		next->num_sge    = user_wr->num_sge;
+
+		if (next->num_sge) {
+			next->sg_list = (void *) next +
+				ALIGN(sizeof *next, sizeof (struct ib_sge));
+			if (copy_from_user(next->sg_list,
+					   buf + wr_count * wqe_size +
+					   sg_ind * sizeof (struct ib_sge),
+					   next->num_sge * sizeof (struct ib_sge))) {
+				ret = -EFAULT;
+				goto err;
+			}
+			sg_ind += next->num_sge;
+		} else
+			next->sg_list = NULL;
+	}
+
+	kfree(user_wr);
+	return wr;
+
+err:
+	kfree(user_wr);
+
+	while (wr) {
+		next = wr->next;
+		kfree(wr);
+		wr = next;
+	}
+
+	return ERR_PTR(ret);
+}
+
+ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+	struct ib_uverbs_post_recv      cmd;
+	struct ib_uverbs_post_recv_resp resp;
+	struct ib_recv_wr              *wr, *next, *bad_wr;
+	struct ib_qp                   *qp;
+	ssize_t                         ret = -EINVAL;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+				       in_len - sizeof cmd, cmd.wr_count,
+				       cmd.sge_count, cmd.wqe_size);
+	if (IS_ERR(wr))
+		return PTR_ERR(wr);
+
+	down(&ib_uverbs_idr_mutex);
+
+	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+	if (!qp || qp->uobject->context != file->ucontext)
+		goto out;
+
+	resp.bad_wr = 0;
+	ret = qp->device->post_recv(qp, wr, &bad_wr);
+	if (ret)
+		for (next = wr; next; next = next->next) {
+			++resp.bad_wr;
+			if (next == bad_wr)
+				break;
+		}
+
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
+		ret = -EFAULT;
+
+out:
+	up(&ib_uverbs_idr_mutex);
+
+	while (wr) {
+		next = wr->next;
+		kfree(wr);
+		wr = next;
+	}
+
+	return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+	struct ib_uverbs_post_srq_recv      cmd;
+	struct ib_uverbs_post_srq_recv_resp resp;
+	struct ib_recv_wr                  *wr, *next, *bad_wr;
+	struct ib_srq                      *srq;
+	ssize_t                             ret = -EINVAL;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+				       in_len - sizeof cmd, cmd.wr_count,
+				       cmd.sge_count, cmd.wqe_size);
+	if (IS_ERR(wr))
+		return PTR_ERR(wr);
+
+	down(&ib_uverbs_idr_mutex);
+
+	srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
+	if (!srq || srq->uobject->context != file->ucontext)
+		goto out;
+
+	resp.bad_wr = 0;
+	ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
+	if (ret)
+		for (next = wr; next; next = next->next) {
+			++resp.bad_wr;
+			if (next == bad_wr)
+				break;
+		}
+
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
+		ret = -EFAULT;
+
+out:
+	up(&ib_uverbs_idr_mutex);
+
+	while (wr) {
+		next = wr->next;
+		kfree(wr);
+		wr = next;
+	}
+
+	return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
+			    const char __user *buf, int in_len,
+			    int out_len)
+{
+	struct ib_uverbs_create_ah	 cmd;
+	struct ib_uverbs_create_ah_resp	 resp;
+	struct ib_uobject		*uobj;
+	struct ib_pd			*pd;
+	struct ib_ah			*ah;
+	struct ib_ah_attr		attr;
+	int ret;
+
+	if (out_len < sizeof resp)
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+	if (!uobj)
+		return -ENOMEM;
+
+	down(&ib_uverbs_idr_mutex);
+
+	pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+	if (!pd || pd->uobject->context != file->ucontext) {
+		ret = -EINVAL;
+		goto err_up;
+	}
+
+	uobj->user_handle = cmd.user_handle;
+	uobj->context     = file->ucontext;
+
+	attr.dlid 	       = cmd.attr.dlid;
+	attr.sl 	       = cmd.attr.sl;
+	attr.src_path_bits     = cmd.attr.src_path_bits;
+	attr.static_rate       = cmd.attr.static_rate;
+	attr.port_num 	       = cmd.attr.port_num;
+	attr.grh.flow_label    = cmd.attr.grh.flow_label;
+	attr.grh.sgid_index    = cmd.attr.grh.sgid_index;
+	attr.grh.hop_limit     = cmd.attr.grh.hop_limit;
+	attr.grh.traffic_class = cmd.attr.grh.traffic_class;
+	memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
+
+	ah = ib_create_ah(pd, &attr);
+	if (IS_ERR(ah)) {
+		ret = PTR_ERR(ah);
+		goto err_up;
+	}
+
+	ah->uobject = uobj;
+
+retry:
+	if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto err_destroy;
+	}
+
+	ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id);
+
+	if (ret == -EAGAIN)
+		goto retry;
+	if (ret)
+		goto err_destroy;
+
+	resp.ah_handle = uobj->id;
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp)) {
+		ret = -EFAULT;
+		goto err_idr;
+	}
+
+	down(&file->mutex);
+	list_add_tail(&uobj->list, &file->ucontext->ah_list);
+	up(&file->mutex);
+
+	up(&ib_uverbs_idr_mutex);
+
+	return in_len;
+
+err_idr:
+	idr_remove(&ib_uverbs_ah_idr, uobj->id);
+
+err_destroy:
+	ib_destroy_ah(ah);
+
+err_up:
+	up(&ib_uverbs_idr_mutex);
+
+	kfree(uobj);
+	return ret;
+}
+
+ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
+			     const char __user *buf, int in_len, int out_len)
+{
+	struct ib_uverbs_destroy_ah cmd;
+	struct ib_ah		   *ah;
+	struct ib_uobject	   *uobj;
+	int			    ret = -EINVAL;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	down(&ib_uverbs_idr_mutex);
+
+	ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle);
+	if (!ah || ah->uobject->context != file->ucontext)
+		goto out;
+
+	uobj = ah->uobject;
+
+	ret = ib_destroy_ah(ah);
+	if (ret)
+		goto out;
+
+	idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle);
+
+	down(&file->mutex);
+	list_del(&uobj->list);
+	up(&file->mutex);
+
+	kfree(uobj);
+
+out:
+	up(&ib_uverbs_idr_mutex);
+
+	return ret ? ret : in_len;
+}
+
 ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
 			       const char __user *buf, int in_len,
 			       int out_len)
--- hw/mthca/mthca_provider.c	(revision 3710)
+++ hw/mthca/mthca_provider.c	(working copy)
@@ -37,6 +37,7 @@
  */
 
 #include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
 #include <linux/mm.h>
 
 #include "mthca_dev.h"
@@ -1077,6 +1078,25 @@ int mthca_register_device(struct mthca_d
 	dev->ib_dev.owner                = THIS_MODULE;
 
 	dev->ib_dev.uverbs_abi_ver	 = MTHCA_UVERBS_ABI_VERSION;
+	dev->ib_dev.uverbs_cmd_mask	 =
+		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
+		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
+		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
+		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
+		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
+		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
+		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
+		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
 	dev->ib_dev.node_type            = IB_NODE_CA;
 	dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
 	dev->ib_dev.dma_device           = &dev->pdev->dev;



More information about the general mailing list