[openib-general] [PATCH 05/16] IB uverbs: core implementation
Roland Dreier
rolandd at cisco.com
Tue Jun 28 16:03:43 PDT 2005
Add the core of the InfiniBand userspace verbs implementation,
including creating character device nodes, dispatching requests from
userspace, and passing event notifications back up to userspace.
Signed-off-by: Roland Dreier <rolandd at cisco.com>
---
drivers/infiniband/core/uverbs.h | 132 ++++
drivers/infiniband/core/uverbs_cmd.c | 1006 ++++++++++++++++++++++++++++++++++
drivers/infiniband/core/uverbs_main.c | 708 +++++++++++++++++++++++
3 files changed, 1846 insertions(+)
--- /dev/null 2005-06-23 14:14:38.423479552 -0700
+++ linux/drivers/infiniband/core/uverbs.h 2005-06-28 15:20:04.361964423 -0700
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs.h 2559 2005-06-06 19:43:16Z roland $
+ */
+
+#ifndef UVERBS_H
+#define UVERBS_H
+
+/* Include device.h and fs.h until cdev.h is self-sufficient */
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/kref.h>
+#include <linux/idr.h>
+
+#include <ib_verbs.h>
+#include <ib_user_verbs.h>
+
+struct ib_uverbs_device {
+ int devnum;
+ struct cdev dev;
+ struct class_device class_dev;
+ struct ib_device *ib_dev;
+ int num_comp;
+};
+
+struct ib_uverbs_event_file {
+ struct kref ref;
+ struct ib_uverbs_file *uverbs_file;
+ spinlock_t lock;
+ int fd;
+ int is_async;
+ wait_queue_head_t poll_wait;
+ struct list_head event_list;
+};
+
+struct ib_uverbs_file {
+ struct kref ref;
+ struct ib_uverbs_device *device;
+ struct ib_ucontext *ucontext;
+ struct ib_event_handler event_handler;
+ struct ib_uverbs_event_file async_file;
+ struct ib_uverbs_event_file comp_file[1];
+};
+
+struct ib_uverbs_async_event {
+ struct ib_uverbs_async_event_desc desc;
+ struct list_head list;
+};
+
+struct ib_uverbs_comp_event {
+ struct ib_uverbs_comp_event_desc desc;
+ struct list_head list;
+};
+
+struct ib_uobject_mr {
+ struct ib_uobject uobj;
+ struct page *page_list;
+ struct scatterlist *sg_list;
+};
+
+extern struct semaphore ib_uverbs_idr_mutex;
+extern struct idr ib_uverbs_pd_idr;
+extern struct idr ib_uverbs_mr_idr;
+extern struct idr ib_uverbs_mw_idr;
+extern struct idr ib_uverbs_ah_idr;
+extern struct idr ib_uverbs_cq_idr;
+extern struct idr ib_uverbs_qp_idr;
+
+void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
+void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
+
+int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
+ void *addr, size_t size, int write);
+void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
+void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
+
+#define IB_UVERBS_DECLARE_CMD(name) \
+ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
+ const char __user *buf, int in_len, \
+ int out_len)
+
+IB_UVERBS_DECLARE_CMD(query_params);
+IB_UVERBS_DECLARE_CMD(get_context);
+IB_UVERBS_DECLARE_CMD(query_device);
+IB_UVERBS_DECLARE_CMD(query_port);
+IB_UVERBS_DECLARE_CMD(query_gid);
+IB_UVERBS_DECLARE_CMD(query_pkey);
+IB_UVERBS_DECLARE_CMD(alloc_pd);
+IB_UVERBS_DECLARE_CMD(dealloc_pd);
+IB_UVERBS_DECLARE_CMD(reg_mr);
+IB_UVERBS_DECLARE_CMD(dereg_mr);
+IB_UVERBS_DECLARE_CMD(create_cq);
+IB_UVERBS_DECLARE_CMD(destroy_cq);
+IB_UVERBS_DECLARE_CMD(create_qp);
+IB_UVERBS_DECLARE_CMD(modify_qp);
+IB_UVERBS_DECLARE_CMD(destroy_qp);
+IB_UVERBS_DECLARE_CMD(attach_mcast);
+IB_UVERBS_DECLARE_CMD(detach_mcast);
+
+#endif /* UVERBS_H */
--- /dev/null 2005-06-23 14:14:38.423479552 -0700
+++ linux/drivers/infiniband/core/uverbs_cmd.c 2005-06-28 15:20:04.365963559 -0700
@@ -0,0 +1,1006 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $
+ */
+
+#include <asm/uaccess.h>
+
+#include "uverbs.h"
+
+#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
+ do { \
+ (udata)->inbuf = (void __user *) (ibuf); \
+ (udata)->outbuf = (void __user *) (obuf); \
+ (udata)->inlen = (ilen); \
+ (udata)->outlen = (olen); \
+ } while (0)
+
+ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_params cmd;
+ struct ib_uverbs_query_params_resp resp;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.num_cq_events = file->device->num_comp;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_get_context cmd;
+ struct ib_uverbs_get_context_resp resp;
+ struct ib_udata udata;
+ struct ib_device *ibdev = file->device->ib_dev;
+ int i;
+ int ret = in_len;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ file->ucontext = ibdev->alloc_ucontext(ibdev, &udata);
+ if (IS_ERR(file->ucontext)) {
+ ret = PTR_ERR(file->ucontext);
+ file->ucontext = NULL;
+ return ret;
+ }
+
+ file->ucontext->device = ibdev;
+ INIT_LIST_HEAD(&file->ucontext->pd_list);
+ INIT_LIST_HEAD(&file->ucontext->mr_list);
+ INIT_LIST_HEAD(&file->ucontext->mw_list);
+ INIT_LIST_HEAD(&file->ucontext->cq_list);
+ INIT_LIST_HEAD(&file->ucontext->qp_list);
+ INIT_LIST_HEAD(&file->ucontext->srq_list);
+ INIT_LIST_HEAD(&file->ucontext->ah_list);
+ spin_lock_init(&file->ucontext->lock);
+
+ resp.async_fd = file->async_file.fd;
+ for (i = 0; i < file->device->num_comp; ++i)
+ if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab +
+ i * sizeof (__u32),
+ &file->comp_file[i].fd, sizeof (__u32)))
+ goto err;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ goto err;
+
+ return in_len;
+
+err:
+ ibdev->dealloc_ucontext(file->ucontext);
+ file->ucontext = NULL;
+
+ return -EFAULT;
+}
+
+ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_device cmd;
+ struct ib_uverbs_query_device_resp resp;
+ struct ib_device_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ ret = ib_query_device(file->device->ib_dev, &attr);
+ if (ret)
+ return ret;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.fw_ver = attr.fw_ver;
+ resp.node_guid = attr.node_guid;
+ resp.sys_image_guid = attr.sys_image_guid;
+ resp.max_mr_size = attr.max_mr_size;
+ resp.page_size_cap = attr.page_size_cap;
+ resp.vendor_id = attr.vendor_id;
+ resp.vendor_part_id = attr.vendor_part_id;
+ resp.hw_ver = attr.hw_ver;
+ resp.max_qp = attr.max_qp;
+ resp.max_qp_wr = attr.max_qp_wr;
+ resp.device_cap_flags = attr.device_cap_flags;
+ resp.max_sge = attr.max_sge;
+ resp.max_sge_rd = attr.max_sge_rd;
+ resp.max_cq = attr.max_cq;
+ resp.max_cqe = attr.max_cqe;
+ resp.max_mr = attr.max_mr;
+ resp.max_pd = attr.max_pd;
+ resp.max_qp_rd_atom = attr.max_qp_rd_atom;
+ resp.max_ee_rd_atom = attr.max_ee_rd_atom;
+ resp.max_res_rd_atom = attr.max_res_rd_atom;
+ resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom;
+ resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom;
+ resp.atomic_cap = attr.atomic_cap;
+ resp.max_ee = attr.max_ee;
+ resp.max_rdd = attr.max_rdd;
+ resp.max_mw = attr.max_mw;
+ resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp;
+ resp.max_raw_ethy_qp = attr.max_raw_ethy_qp;
+ resp.max_mcast_grp = attr.max_mcast_grp;
+ resp.max_mcast_qp_attach = attr.max_mcast_qp_attach;
+ resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach;
+ resp.max_ah = attr.max_ah;
+ resp.max_fmr = attr.max_fmr;
+ resp.max_map_per_fmr = attr.max_map_per_fmr;
+ resp.max_srq = attr.max_srq;
+ resp.max_srq_wr = attr.max_srq_wr;
+ resp.max_srq_sge = attr.max_srq_sge;
+ resp.max_pkeys = attr.max_pkeys;
+ resp.local_ca_ack_delay = attr.local_ca_ack_delay;
+ resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_port cmd;
+ struct ib_uverbs_query_port_resp resp;
+ struct ib_port_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr);
+ if (ret)
+ return ret;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.state = attr.state;
+ resp.max_mtu = attr.max_mtu;
+ resp.active_mtu = attr.active_mtu;
+ resp.gid_tbl_len = attr.gid_tbl_len;
+ resp.port_cap_flags = attr.port_cap_flags;
+ resp.max_msg_sz = attr.max_msg_sz;
+ resp.bad_pkey_cntr = attr.bad_pkey_cntr;
+ resp.qkey_viol_cntr = attr.qkey_viol_cntr;
+ resp.pkey_tbl_len = attr.pkey_tbl_len;
+ resp.lid = attr.lid;
+ resp.sm_lid = attr.sm_lid;
+ resp.lmc = attr.lmc;
+ resp.max_vl_num = attr.max_vl_num;
+ resp.sm_sl = attr.sm_sl;
+ resp.subnet_timeout = attr.subnet_timeout;
+ resp.init_type_reply = attr.init_type_reply;
+ resp.active_width = attr.active_width;
+ resp.active_speed = attr.active_speed;
+ resp.phys_state = attr.phys_state;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_gid cmd;
+ struct ib_uverbs_query_gid_resp resp;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ memset(&resp, 0, sizeof resp);
+
+ ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index,
+ (union ib_gid *) resp.gid);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_pkey cmd;
+ struct ib_uverbs_query_pkey_resp resp;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ memset(&resp, 0, sizeof resp);
+
+ ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index,
+ &resp.pkey);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_alloc_pd cmd;
+ struct ib_uverbs_alloc_pd_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ uobj->context = file->ucontext;
+
+ pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
+ file->ucontext, &udata);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
+ goto err;
+ }
+
+ pd->device = file->device->ib_dev;
+ pd->uobject = uobj;
+ atomic_set(&pd->usecnt, 0);
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_pd;
+ }
+
+ down(&ib_uverbs_idr_mutex);
+ ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id);
+ up(&ib_uverbs_idr_mutex);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_pd;
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_add_tail(&uobj->list, &file->ucontext->pd_list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ memset(&resp, 0, sizeof resp);
+ resp.pd_handle = uobj->id;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_list;
+ }
+
+ return in_len;
+
+err_list:
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ down(&ib_uverbs_idr_mutex);
+ idr_remove(&ib_uverbs_pd_idr, uobj->id);
+ up(&ib_uverbs_idr_mutex);
+
+err_pd:
+ ib_dealloc_pd(pd);
+
+err:
+ kfree(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_dealloc_pd cmd;
+ struct ib_pd *pd;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+ if (!pd || pd->uobject->context != file->ucontext)
+ goto out;
+
+ uobj = pd->uobject;
+
+ ret = ib_dealloc_pd(pd);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle);
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ kfree(uobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_reg_mr cmd;
+ struct ib_uverbs_reg_mr_resp resp;
+ struct ib_udata udata;
+ struct ib_umem_object *obj;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
+ return -EINVAL;
+
+ obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ obj->uobject.context = file->ucontext;
+
+ /*
+ * We ask for writable memory if any access flags other than
+ * "remote read" are set. "Local write" and "remote write"
+ * obviously require write access. "Remote atomic" can do
+ * things like fetch and add, which will modify memory, and
+ * "MW bind" can change permissions by binding a window.
+ */
+ ret = ib_umem_get(file->device->ib_dev, &obj->umem,
+ (void *) (unsigned long) cmd.start, cmd.length,
+ !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
+ if (ret)
+ goto err_free;
+
+ obj->umem.virt_base = cmd.hca_va;
+
+ down(&ib_uverbs_idr_mutex);
+
+ pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+ if (!pd || pd->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto err_up;
+ }
+
+ if (!pd->device->reg_user_mr) {
+ ret = -ENOSYS;
+ goto err_up;
+ }
+
+ mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
+ if (IS_ERR(mr)) {
+ ret = PTR_ERR(mr);
+ goto err_up;
+ }
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = &obj->uobject;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+
+ memset(&resp, 0, sizeof resp);
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_unreg;
+ }
+
+ ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_unreg;
+
+ resp.mr_handle = obj->uobject.id;
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_list;
+ }
+
+ up(&ib_uverbs_idr_mutex);
+
+ return in_len;
+
+err_list:
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&obj->uobject.list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+err_unreg:
+ ib_dereg_mr(mr);
+
+err_up:
+ up(&ib_uverbs_idr_mutex);
+
+ ib_umem_release(file->device->ib_dev, &obj->umem);
+
+err_free:
+ kfree(obj);
+ return ret;
+}
+
+ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_dereg_mr cmd;
+ struct ib_mr *mr;
+ struct ib_umem_object *memobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle);
+ if (!mr || mr->uobject->context != file->ucontext)
+ goto out;
+
+ memobj = container_of(mr->uobject, struct ib_umem_object, uobject);
+
+ ret = ib_dereg_mr(mr);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle);
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&memobj->uobject.list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ ib_umem_release(file->device->ib_dev, &memobj->umem);
+ kfree(memobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_cq cmd;
+ struct ib_uverbs_create_cq_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_cq *cq;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ if (cmd.event_handler >= file->device->num_comp)
+ return -EINVAL;
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ uobj->user_handle = cmd.user_handle;
+ uobj->context = file->ucontext;
+
+ cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe,
+ file->ucontext, &udata);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto err;
+ }
+
+ cq->device = file->device->ib_dev;
+ cq->uobject = uobj;
+ cq->comp_handler = ib_uverbs_comp_handler;
+ cq->event_handler = ib_uverbs_cq_event_handler;
+ cq->cq_context = file;
+ atomic_set(&cq->usecnt, 0);
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_cq;
+ }
+
+ down(&ib_uverbs_idr_mutex);
+ ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->id);
+ up(&ib_uverbs_idr_mutex);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_cq;
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_add_tail(&uobj->list, &file->ucontext->cq_list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ memset(&resp, 0, sizeof resp);
+ resp.cq_handle = uobj->id;
+ resp.cqe = cq->cqe;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_list;
+ }
+
+ return in_len;
+
+err_list:
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ down(&ib_uverbs_idr_mutex);
+ idr_remove(&ib_uverbs_cq_idr, uobj->id);
+ up(&ib_uverbs_idr_mutex);
+
+err_cq:
+ ib_destroy_cq(cq);
+
+err:
+ kfree(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_destroy_cq cmd;
+ struct ib_cq *cq;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+ if (!cq || cq->uobject->context != file->ucontext)
+ goto out;
+
+ uobj = cq->uobject;
+
+ ret = ib_destroy_cq(cq);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle);
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ kfree(uobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_qp cmd;
+ struct ib_uverbs_create_qp_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ struct ib_cq *scq, *rcq;
+ struct ib_qp *qp;
+ struct ib_qp_init_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ down(&ib_uverbs_idr_mutex);
+
+ pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+ scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle);
+ rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle);
+
+ if (!pd || pd->uobject->context != file->ucontext ||
+ !scq || scq->uobject->context != file->ucontext ||
+ !rcq || rcq->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto err_up;
+ }
+
+ attr.event_handler = ib_uverbs_qp_event_handler;
+ attr.qp_context = file;
+ attr.send_cq = scq;
+ attr.recv_cq = rcq;
+ attr.srq = NULL;
+ attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ attr.qp_type = cmd.qp_type;
+
+ attr.cap.max_send_wr = cmd.max_send_wr;
+ attr.cap.max_recv_wr = cmd.max_recv_wr;
+ attr.cap.max_send_sge = cmd.max_send_sge;
+ attr.cap.max_recv_sge = cmd.max_recv_sge;
+ attr.cap.max_inline_data = cmd.max_inline_data;
+
+ uobj->user_handle = cmd.user_handle;
+ uobj->context = file->ucontext;
+
+ qp = pd->device->create_qp(pd, &attr, &udata);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto err_up;
+ }
+
+ qp->device = pd->device;
+ qp->pd = pd;
+ qp->send_cq = attr.send_cq;
+ qp->recv_cq = attr.recv_cq;
+ qp->srq = attr.srq;
+ qp->uobject = uobj;
+ qp->event_handler = attr.event_handler;
+ qp->qp_context = attr.qp_context;
+ qp->qp_type = attr.qp_type;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&attr.send_cq->usecnt);
+ atomic_inc(&attr.recv_cq->usecnt);
+ if (attr.srq)
+ atomic_inc(&attr.srq->usecnt);
+
+ memset(&resp, 0, sizeof resp);
+ resp.qpn = qp->qp_num;
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_qp_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_destroy;
+ }
+
+ ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->id);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_destroy;
+
+ resp.qp_handle = uobj->id;
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_add_tail(&uobj->list, &file->ucontext->qp_list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_list;
+ }
+
+ up(&ib_uverbs_idr_mutex);
+
+ return in_len;
+
+err_list:
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+err_destroy:
+ ib_destroy_qp(qp);
+
+err_up:
+ up(&ib_uverbs_idr_mutex);
+
+ kfree(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_modify_qp cmd;
+ struct ib_qp *qp;
+ struct ib_qp_attr *attr;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (!qp || qp->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ attr->qp_state = cmd.qp_state;
+ attr->cur_qp_state = cmd.cur_qp_state;
+ attr->path_mtu = cmd.path_mtu;
+ attr->path_mig_state = cmd.path_mig_state;
+ attr->qkey = cmd.qkey;
+ attr->rq_psn = cmd.rq_psn;
+ attr->sq_psn = cmd.sq_psn;
+ attr->dest_qp_num = cmd.dest_qp_num;
+ attr->qp_access_flags = cmd.qp_access_flags;
+ attr->pkey_index = cmd.pkey_index;
+ attr->alt_pkey_index = cmd.pkey_index;
+ attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
+ attr->max_rd_atomic = cmd.max_rd_atomic;
+ attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
+ attr->min_rnr_timer = cmd.min_rnr_timer;
+ attr->port_num = cmd.port_num;
+ attr->timeout = cmd.timeout;
+ attr->retry_cnt = cmd.retry_cnt;
+ attr->rnr_retry = cmd.rnr_retry;
+ attr->alt_port_num = cmd.alt_port_num;
+ attr->alt_timeout = cmd.alt_timeout;
+
+ memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
+ attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
+ attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
+ attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
+ attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
+ attr->ah_attr.dlid = cmd.dest.dlid;
+ attr->ah_attr.sl = cmd.dest.sl;
+ attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
+ attr->ah_attr.static_rate = cmd.dest.static_rate;
+ attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
+ attr->ah_attr.port_num = cmd.dest.port_num;
+
+ memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
+ attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
+ attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
+ attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
+ attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
+ attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
+ attr->alt_ah_attr.sl = cmd.alt_dest.sl;
+ attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
+ attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
+ attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
+ attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
+
+ ret = ib_modify_qp(qp, attr, cmd.attr_mask);
+ if (ret)
+ goto out;
+
+ ret = in_len;
+
+out:
+ up(&ib_uverbs_idr_mutex);
+ kfree(attr);
+
+ return ret;
+}
+
+ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_destroy_qp cmd;
+ struct ib_qp *qp;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (!qp || qp->uobject->context != file->ucontext)
+ goto out;
+
+ uobj = qp->uobject;
+
+ ret = ib_destroy_qp(qp);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle);
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ kfree(uobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_attach_mcast cmd;
+ struct ib_qp *qp;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (qp && qp->uobject->context == file->ucontext)
+ ret = ib_attach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
+
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_detach_mcast cmd;
+ struct ib_qp *qp;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (qp && qp->uobject->context == file->ucontext)
+ ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
+
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
--- /dev/null 2005-06-23 14:14:38.423479552 -0700
+++ linux/drivers/infiniband/core/uverbs_main.c 2005-06-28 15:20:04.363963991 -0700
@@ -0,0 +1,708 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+
+#include <asm/uaccess.h>
+
+#include "uverbs.h"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("InfiniBand userspace verbs access");
+MODULE_LICENSE("Dual BSD/GPL");
+
+#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */
+
+enum {
+ IB_UVERBS_MAJOR = 231,
+ IB_UVERBS_BASE_MINOR = 192,
+ IB_UVERBS_MAX_DEVICES = 32
+};
+
+#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
+
+DECLARE_MUTEX(ib_uverbs_idr_mutex);
+DEFINE_IDR(ib_uverbs_pd_idr);
+DEFINE_IDR(ib_uverbs_mr_idr);
+DEFINE_IDR(ib_uverbs_mw_idr);
+DEFINE_IDR(ib_uverbs_ah_idr);
+DEFINE_IDR(ib_uverbs_cq_idr);
+DEFINE_IDR(ib_uverbs_qp_idr);
+
+static spinlock_t map_lock;
+static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
+
+static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len) = {
+ [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params,
+ [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
+ [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
+ [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
+ [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid,
+ [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey,
+ [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
+ [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
+ [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
+ [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
+ [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
+ [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
+ [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
+ [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
+ [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
+ [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
+ [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
+};
+
+static struct vfsmount *uverbs_event_mnt;
+
+static void ib_uverbs_add_one(struct ib_device *device);
+static void ib_uverbs_remove_one(struct ib_device *device);
+
+static int ib_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct ib_uobject *uobj, *tmp;
+
+ if (!context)
+ return 0;
+
+ down(&ib_uverbs_idr_mutex);
+
+ /* XXX Free AHs */
+
+ list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
+ struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id);
+ idr_remove(&ib_uverbs_qp_idr, uobj->id);
+ ib_destroy_qp(qp);
+ list_del(&uobj->list);
+ kfree(uobj);
+ }
+
+ list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
+ struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id);
+ idr_remove(&ib_uverbs_cq_idr, uobj->id);
+ ib_destroy_cq(cq);
+ list_del(&uobj->list);
+ kfree(uobj);
+ }
+
+ /* XXX Free SRQs */
+ /* XXX Free MWs */
+
+ list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
+ struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id);
+ struct ib_umem_object *memobj;
+
+ idr_remove(&ib_uverbs_mr_idr, uobj->id);
+ ib_dereg_mr(mr);
+
+ memobj = container_of(uobj, struct ib_umem_object, uobject);
+ ib_umem_release_on_close(mr->device, &memobj->umem);
+
+ list_del(&uobj->list);
+ kfree(memobj);
+ }
+
+ list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
+ struct ib_pd *pd = idr_find(&ib_uverbs_pd_idr, uobj->id);
+ idr_remove(&ib_uverbs_pd_idr, uobj->id);
+ ib_dealloc_pd(pd);
+ list_del(&uobj->list);
+ kfree(uobj);
+ }
+
+ up(&ib_uverbs_idr_mutex);
+
+ return context->device->dealloc_ucontext(context);
+}
+
+static void ib_uverbs_release_file(struct kref *ref)
+{
+ struct ib_uverbs_file *file =
+ container_of(ref, struct ib_uverbs_file, ref);
+
+ module_put(file->device->ib_dev->owner);
+ kfree(file);
+}
+
+static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_event_file *file = filp->private_data;
+ void *event;
+ int eventsz;
+ int ret = 0;
+
+ spin_lock_irq(&file->lock);
+
+ while (list_empty(&file->event_list) && file->fd >= 0) {
+ spin_unlock_irq(&file->lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(file->poll_wait,
+ !list_empty(&file->event_list) ||
+ file->fd < 0))
+ return -ERESTARTSYS;
+
+ spin_lock_irq(&file->lock);
+ }
+
+ if (file->fd < 0) {
+ spin_unlock_irq(&file->lock);
+ return -ENODEV;
+ }
+
+ if (file->is_async) {
+ event = list_entry(file->event_list.next,
+ struct ib_uverbs_async_event, list);
+ eventsz = sizeof (struct ib_uverbs_async_event_desc);
+ } else {
+ event = list_entry(file->event_list.next,
+ struct ib_uverbs_comp_event, list);
+ eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+ }
+
+ if (eventsz > count) {
+ ret = -EINVAL;
+ event = NULL;
+ } else
+ list_del(file->event_list.next);
+
+ spin_unlock_irq(&file->lock);
+
+ if (event) {
+ if (copy_to_user(buf, event, eventsz))
+ ret = -EFAULT;
+ else
+ ret = eventsz;
+ }
+
+ kfree(event);
+
+ return ret;
+}
+
+static unsigned int ib_uverbs_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ unsigned int pollflags = 0;
+ struct ib_uverbs_event_file *file = filp->private_data;
+
+ poll_wait(filp, &file->poll_wait, wait);
+
+ spin_lock_irq(&file->lock);
+ if (file->fd < 0)
+ pollflags = POLLERR;
+ else if (!list_empty(&file->event_list))
+ pollflags = POLLIN | POLLRDNORM;
+ spin_unlock_irq(&file->lock);
+
+ return pollflags;
+}
+
+static void ib_uverbs_event_release(struct ib_uverbs_event_file *file)
+{
+ struct list_head *entry, *tmp;
+
+ spin_lock_irq(&file->lock);
+ if (file->fd != -1) {
+ file->fd = -1;
+ list_for_each_safe(entry, tmp, &file->event_list)
+ if (file->is_async)
+ kfree(list_entry(entry, struct ib_uverbs_async_event, list));
+ else
+ kfree(list_entry(entry, struct ib_uverbs_comp_event, list));
+ }
+ spin_unlock_irq(&file->lock);
+}
+
+static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_event_file *file = filp->private_data;
+
+ ib_uverbs_event_release(file);
+ kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+
+ return 0;
+}
+
+static struct file_operations uverbs_event_fops = {
+ /*
+ * No .owner field since we artificially create event files,
+ * so there is no increment to the module reference count in
+ * the open path. All event files come from a uverbs command
+ * file, which already takes a module reference, so this is OK.
+ */
+ .read = ib_uverbs_event_read,
+ .poll = ib_uverbs_event_poll,
+ .release = ib_uverbs_event_close
+};
+
+void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
+{
+ struct ib_uverbs_file *file = cq_context;
+ struct ib_uverbs_comp_event *entry;
+ unsigned long flags;
+
+ entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ if (!entry)
+ return;
+
+ entry->desc.cq_handle = cq->uobject->user_handle;
+
+ spin_lock_irqsave(&file->comp_file[0].lock, flags);
+ list_add_tail(&entry->list, &file->comp_file[0].event_list);
+ spin_unlock_irqrestore(&file->comp_file[0].lock, flags);
+
+ wake_up_interruptible(&file->comp_file[0].poll_wait);
+}
+
+static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
+ __u64 element, __u64 event)
+{
+ struct ib_uverbs_async_event *entry;
+ unsigned long flags;
+
+ entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ if (!entry)
+ return;
+
+ entry->desc.element = element;
+ entry->desc.event_type = event;
+
+ spin_lock_irqsave(&file->async_file.lock, flags);
+ list_add_tail(&entry->list, &file->async_file.event_list);
+ spin_unlock_irqrestore(&file->async_file.lock, flags);
+
+ wake_up_interruptible(&file->async_file.poll_wait);
+}
+
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+{
+ ib_uverbs_async_handler(context_ptr,
+ event->element.cq->uobject->user_handle,
+ event->event);
+}
+
+void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
+{
+ ib_uverbs_async_handler(context_ptr,
+ event->element.qp->uobject->user_handle,
+ event->event);
+}
+
+static void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct ib_uverbs_file *file =
+ container_of(handler, struct ib_uverbs_file, event_handler);
+
+ ib_uverbs_async_handler(file, event->element.port_num, event->event);
+}
+
+static int ib_uverbs_event_init(struct ib_uverbs_event_file *file,
+ struct ib_uverbs_file *uverbs_file)
+{
+ struct file *filp;
+
+ spin_lock_init(&file->lock);
+ INIT_LIST_HEAD(&file->event_list);
+ init_waitqueue_head(&file->poll_wait);
+ file->uverbs_file = uverbs_file;
+
+ file->fd = get_unused_fd();
+ if (file->fd < 0)
+ return file->fd;
+
+ filp = get_empty_filp();
+ if (!filp) {
+ put_unused_fd(file->fd);
+ return -ENFILE;
+ }
+
+ filp->f_op = &uverbs_event_fops;
+ filp->f_vfsmnt = mntget(uverbs_event_mnt);
+ filp->f_dentry = dget(uverbs_event_mnt->mnt_root);
+ filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
+ filp->f_flags = O_RDONLY;
+ filp->f_mode = FMODE_READ;
+ filp->private_data = file;
+
+ fd_install(file->fd, filp);
+
+ return 0;
+}
+
+static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+ struct ib_uverbs_cmd_hdr hdr;
+
+ if (count < sizeof hdr)
+ return -EINVAL;
+
+ if (copy_from_user(&hdr, buf, sizeof hdr))
+ return -EFAULT;
+
+ if (hdr.in_words * 4 != count)
+ return -EINVAL;
+
+ if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table))
+ return -EINVAL;
+
+ if (!file->ucontext &&
+ hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS &&
+ hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
+ return -EINVAL;
+
+ return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
+ hdr.in_words * 4, hdr.out_words * 4);
+}
+
+static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+
+ if (!file->ucontext)
+ return -ENODEV;
+ else
+ return file->device->ib_dev->mmap(file->ucontext, vma);
+}
+
+static int ib_uverbs_open(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_device *dev =
+ container_of(inode->i_cdev, struct ib_uverbs_device, dev);
+ struct ib_uverbs_file *file;
+ int i = 0;
+ int ret;
+
+ if (!try_module_get(dev->ib_dev->owner))
+ return -ENODEV;
+
+ file = kmalloc(sizeof *file +
+ (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file),
+ GFP_KERNEL);
+ if (!file)
+ return -ENOMEM;
+
+ file->device = dev;
+ kref_init(&file->ref);
+
+ file->ucontext = NULL;
+
+ ret = ib_uverbs_event_init(&file->async_file, file);
+ if (ret)
+ goto err;
+
+ file->async_file.is_async = 1;
+
+ kref_get(&file->ref);
+
+ for (i = 0; i < dev->num_comp; ++i) {
+ ret = ib_uverbs_event_init(&file->comp_file[i], file);
+ if (ret)
+ goto err_async;
+ kref_get(&file->ref);
+ file->comp_file[i].is_async = 0;
+ }
+
+
+ filp->private_data = file;
+
+ INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev,
+ ib_uverbs_event_handler);
+ if (ib_register_event_handler(&file->event_handler))
+ goto err_async;
+
+ return 0;
+
+err_async:
+ while (i--)
+ ib_uverbs_event_release(&file->comp_file[i]);
+
+ ib_uverbs_event_release(&file->async_file);
+
+err:
+ kref_put(&file->ref, ib_uverbs_release_file);
+
+ return ret;
+}
+
+static int ib_uverbs_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+ int i;
+
+ ib_unregister_event_handler(&file->event_handler);
+ ib_uverbs_event_release(&file->async_file);
+ ib_dealloc_ucontext(file->ucontext);
+
+ for (i = 0; i < file->device->num_comp; ++i)
+ ib_uverbs_event_release(&file->comp_file[i]);
+
+ kref_put(&file->ref, ib_uverbs_release_file);
+
+ return 0;
+}
+
+static struct file_operations uverbs_fops = {
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
+ .open = ib_uverbs_open,
+ .release = ib_uverbs_close
+};
+
+static struct file_operations uverbs_mmap_fops = {
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
+ .mmap = ib_uverbs_mmap,
+ .open = ib_uverbs_open,
+ .release = ib_uverbs_close
+};
+
+static struct ib_client uverbs_client = {
+ .name = "uverbs",
+ .add = ib_uverbs_add_one,
+ .remove = ib_uverbs_remove_one
+};
+
+static ssize_t show_dev(struct class_device *class_dev, char *buf)
+{
+ struct ib_uverbs_device *dev =
+ container_of(class_dev, struct ib_uverbs_device, class_dev);
+
+ return print_dev_t(buf, dev->dev.dev);
+}
+static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
+
+static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
+{
+ struct ib_uverbs_device *dev =
+ container_of(class_dev, struct ib_uverbs_device, class_dev);
+
+ return sprintf(buf, "%s\n", dev->ib_dev->name);
+}
+static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+
+static void ib_uverbs_release_class_dev(struct class_device *class_dev)
+{
+ struct ib_uverbs_device *dev =
+ container_of(class_dev, struct ib_uverbs_device, class_dev);
+
+ cdev_del(&dev->dev);
+ clear_bit(dev->devnum, dev_map);
+ kfree(dev);
+}
+
+static struct class uverbs_class = {
+ .name = "infiniband_verbs",
+ .release = ib_uverbs_release_class_dev
+};
+
+static ssize_t show_abi_version(struct class *class, char *buf)
+{
+ return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
+}
+static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static void ib_uverbs_add_one(struct ib_device *device)
+{
+ struct ib_uverbs_device *uverbs_dev;
+
+ if (!device->alloc_ucontext)
+ return;
+
+ uverbs_dev = kmalloc(sizeof *uverbs_dev, GFP_KERNEL);
+ if (!uverbs_dev)
+ return;
+
+ memset(uverbs_dev, 0, sizeof *uverbs_dev);
+
+ spin_lock(&map_lock);
+ uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
+ if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
+ spin_unlock(&map_lock);
+ goto err;
+ }
+ set_bit(uverbs_dev->devnum, dev_map);
+ spin_unlock(&map_lock);
+
+ uverbs_dev->ib_dev = device;
+ uverbs_dev->num_comp = 1;
+
+ if (device->mmap)
+ cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops);
+ else
+ cdev_init(&uverbs_dev->dev, &uverbs_fops);
+ uverbs_dev->dev.owner = THIS_MODULE;
+ kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum);
+ if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
+ goto err;
+
+ uverbs_dev->class_dev.class = &uverbs_class;
+ uverbs_dev->class_dev.dev = device->dma_device;
+ snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum);
+ if (class_device_register(&uverbs_dev->class_dev))
+ goto err_cdev;
+
+ if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_dev))
+ goto err_class;
+ if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev))
+ goto err_class;
+
+ ib_set_client_data(device, &uverbs_client, uverbs_dev);
+
+ return;
+
+err_class:
+ class_device_unregister(&uverbs_dev->class_dev);
+
+err_cdev:
+ cdev_del(&uverbs_dev->dev);
+ clear_bit(uverbs_dev->devnum, dev_map);
+
+err:
+ kfree(uverbs_dev);
+ return;
+}
+
+static void ib_uverbs_remove_one(struct ib_device *device)
+{
+ struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
+
+ if (!uverbs_dev)
+ return;
+
+ class_device_unregister(&uverbs_dev->class_dev);
+}
+
+static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
+{
+ return get_sb_pseudo(fs_type, "infinibandevent:", NULL,
+ INFINIBANDEVENTFS_MAGIC);
+}
+
+static struct file_system_type uverbs_event_fs = {
+ /* No owner field so module can be unloaded */
+ .name = "infinibandeventfs",
+ .get_sb = uverbs_event_get_sb,
+ .kill_sb = kill_litter_super
+};
+
+static int __init ib_uverbs_init(void)
+{
+ int ret;
+
+ spin_lock_init(&map_lock);
+
+ ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
+ "infiniband_verbs");
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register device number\n");
+ goto out;
+ }
+
+ ret = class_register(&uverbs_class);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
+ goto out_chrdev;
+ }
+
+ ret = class_create_file(&uverbs_class, &class_attr_abi_version);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
+ goto out_class;
+ }
+
+ ret = register_filesystem(&uverbs_event_fs);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n");
+ goto out_class;
+ }
+
+ uverbs_event_mnt = kern_mount(&uverbs_event_fs);
+ if (IS_ERR(uverbs_event_mnt)) {
+ ret = PTR_ERR(uverbs_event_mnt);
+ printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n");
+ goto out_fs;
+ }
+
+ ret = ib_register_client(&uverbs_client);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register client\n");
+ goto out_mnt;
+ }
+
+ return 0;
+
+out_mnt:
+ mntput(uverbs_event_mnt);
+
+out_fs:
+ unregister_filesystem(&uverbs_event_fs);
+
+out_class:
+ class_unregister(&uverbs_class);
+
+out_chrdev:
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+
+out:
+ return ret;
+}
+
+static void __exit ib_uverbs_cleanup(void)
+{
+ ib_unregister_client(&uverbs_client);
+ mntput(uverbs_event_mnt);
+ unregister_filesystem(&uverbs_event_fs);
+ class_unregister(&uverbs_class);
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+}
+
+module_init(ib_uverbs_init);
+module_exit(ib_uverbs_cleanup);
More information about the general
mailing list