[ofa-general] [PATCH 4 of 5] core: XRC implementation -- add support for working with file descriptors
Jack Morgenstein
jackm at dev.mellanox.co.il
Tue Sep 18 10:25:27 PDT 2007
Add XRC support for working with file descriptors, to allow
sharing XRC domains between processes.
Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>
Index: ofed_kernel/drivers/infiniband/core/uverbs_cmd.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/core/uverbs_cmd.c 2007-09-16 16:32:22.844587000 +0200
+++ ofed_kernel/drivers/infiniband/core/uverbs_cmd.c 2007-09-18 11:09:20.590991000 +0200
@@ -39,6 +39,7 @@
#include <linux/fs.h>
#include <asm/uaccess.h>
+#include <asm/fcntl.h>
#include "uverbs.h"
@@ -252,14 +253,18 @@ static void put_srq_read(struct ib_srq *
put_uobj_read(srq->uobject);
}
-static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context)
+static struct ib_xrcd *idr_read_xrcd(int xrcd_handle,
+ struct ib_ucontext *context,
+ struct ib_uobject **uobj)
{
- return idr_read_obj(&ib_uverbs_xrc_domain_idr, xrcd_handle, context, 0);
+ *uobj = idr_read_uobj(&ib_uverbs_xrc_domain_idr, xrcd_handle,
+ context, 0);
+ return *uobj ? (*uobj)->object : NULL;
}
-static void put_xrcd_read(struct ib_xrcd *xrcd)
+static void put_xrcd_read(struct ib_uobject *uobj)
{
- put_uobj_read(xrcd->uobject);
+ put_uobj_read(uobj);
}
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
@@ -1036,6 +1041,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uv
struct ib_qp *qp;
struct ib_qp_init_attr attr;
struct ib_xrcd *xrcd;
+ struct ib_uobject *xrcd_uobj;
int ret;
if (out_len < sizeof resp)
@@ -1058,7 +1064,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uv
srq = (cmd.is_srq && cmd.qp_type != IB_QPT_XRC) ?
idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
xrcd = (cmd.is_srq && cmd.qp_type == IB_QPT_XRC) ?
- idr_read_xrcd(cmd.srq_handle, file->ucontext) : NULL;
+ idr_read_xrcd(cmd.srq_handle, file->ucontext, &xrcd_uobj) : NULL;
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
@@ -1139,7 +1145,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uv
if (srq)
put_srq_read(srq);
if (xrcd)
- put_xrcd_read(xrcd);
+ put_xrcd_read(xrcd_uobj);
mutex_lock(&file->mutex);
list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
@@ -1167,7 +1173,7 @@ err_put:
if (srq)
put_srq_read(srq);
if (xrcd)
- put_xrcd_read(xrcd);
+ put_xrcd_read(xrcd_uobj);
put_uobj_write(&obj->uevent.uobject);
return ret;
@@ -2071,6 +2077,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct
struct ib_cq *xrc_cq;
struct ib_xrcd *xrcd;
struct ib_srq_init_attr attr;
+ struct ib_uobject *xrcd_uobj;
int ret;
if (out_len < sizeof resp)
@@ -2102,7 +2109,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct
goto err_put_pd;
}
- xrcd = idr_read_xrcd(cmd.xrcd_handle, file->ucontext);
+ xrcd = idr_read_xrcd(cmd.xrcd_handle, file->ucontext, &xrcd_uobj);
if (!xrcd) {
ret = -EINVAL;
goto err_put_cq;
@@ -2153,7 +2160,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct
goto err_copy;
}
- put_xrcd_read(xrcd);
+ put_xrcd_read(xrcd_uobj);
put_cq_read(xrc_cq);
put_pd_read(pd);
@@ -2174,7 +2181,7 @@ err_destroy:
ib_destroy_srq(srq);
err_put:
- put_xrcd_read(xrcd);
+ put_xrcd_read(xrcd_uobj);
err_put_cq:
put_cq_read(xrc_cq);
@@ -2306,6 +2313,117 @@ ssize_t ib_uverbs_destroy_srq(struct ib_
return ret ? ret : in_len;
}
+static struct inode * xrc_fd2inode(unsigned int fd)
+{
+ struct file * f = fget(fd);
+
+ if (!f)
+ return NULL;
+
+ return f->f_dentry->d_inode;
+}
+
+struct xrcd_table_entry {
+ struct rb_node node;
+ struct inode * inode;
+ struct ib_xrcd *xrcd;
+};
+
+static int xrcd_table_insert(struct ib_device *dev,
+ struct inode *i_n,
+ struct ib_xrcd *xrcd)
+{
+ struct xrcd_table_entry *entry, *scan;
+ struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node;
+ struct rb_node *parent = NULL;
+
+ entry = kmalloc(sizeof(struct xrcd_table_entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->inode = i_n;
+ entry->xrcd = xrcd;
+
+ while (*p)
+ {
+ parent = *p;
+ scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+ if (i_n < scan->inode)
+ p = &(*p)->rb_left;
+ else if (i_n > scan->inode)
+ p = &(*p)->rb_right;
+ else {
+ kfree(entry);
+ return -EEXIST;
+ }
+ }
+
+ rb_link_node(&entry->node, parent, p);
+ rb_insert_color(&entry->node, &dev->ib_uverbs_xrcd_table);
+ return 0;
+}
+
+static int insert_xrcd(struct ib_device *dev, struct inode *i_n,
+ struct ib_xrcd *xrcd)
+{
+ int ret;
+
+ ret = xrcd_table_insert(dev, i_n, xrcd);
+ if (!ret)
+ igrab(i_n);
+
+ return ret;
+}
+
+static struct xrcd_table_entry * xrcd_table_search(struct ib_device *dev,
+ struct inode *i_n)
+{
+ struct xrcd_table_entry *scan;
+ struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*p)
+ {
+ parent = *p;
+ scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+ if (i_n < scan->inode)
+ p = &(*p)->rb_left;
+ else if (i_n > scan->inode)
+ p = &(*p)->rb_right;
+ else
+ return scan;
+ }
+ return NULL;
+}
+
+static int find_xrcd(struct ib_device *dev, struct inode *i_n,
+ struct ib_xrcd **xrcd)
+{
+ struct xrcd_table_entry *entry;
+
+ entry = xrcd_table_search(dev, i_n);
+ if (!entry)
+ return -EINVAL;
+
+ *xrcd = entry->xrcd;
+ return 0;
+}
+
+
+static void xrcd_table_delete(struct ib_device *dev,
+ struct inode *i_n)
+{
+ struct xrcd_table_entry *entry = xrcd_table_search(dev, i_n);
+
+ if (entry) {
+ iput(i_n);
+ rb_erase(&entry->node, &dev->ib_uverbs_xrcd_table);
+ kfree(entry);
+ }
+}
+
ssize_t ib_uverbs_open_xrc_domain(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -2314,8 +2432,10 @@ ssize_t ib_uverbs_open_xrc_domain(struct
struct ib_uverbs_open_xrc_domain_resp resp;
struct ib_udata udata;
struct ib_uobject *uobj;
- struct ib_xrcd *xrcd;
- int ret;
+ struct ib_xrcd *xrcd = NULL;
+ struct inode *inode = NULL;
+ int ret = 0;
+ int new_xrcd = 0;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -2323,35 +2443,55 @@ ssize_t ib_uverbs_open_xrc_domain(struct
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- /* file descriptors/inodes not yet implemented */
- if (cmd.fd != (u32) (-1))
- return -ENOSYS;
-
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
+ mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
+ if (cmd.fd != (u32) (-1)) {
+ /* search for file descriptor */
+ inode = xrc_fd2inode(cmd.fd);
+ if (!inode) {
+ ret = -EBADF;
+ goto err_table_mutex_unlock;
+ }
+
+ ret = find_xrcd(file->device->ib_dev, inode, &xrcd);
+ if (ret && !(cmd.oflags & O_CREAT)) {
+ /* no file descriptor. Need CREATE flag */
+ ret = -EAGAIN;
+ goto err_table_mutex_unlock;
+ }
+
+ if (xrcd && cmd.oflags & O_EXCL){
+ ret = -EINVAL;
+ goto err_table_mutex_unlock;
+ }
+ }
+
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
+ if (!uobj) {
+ ret = -ENOMEM;
+ goto err_table_mutex_unlock;
+ }
init_uobj(uobj, 0, file->ucontext, &pd_lock_key);
down_write(&uobj->mutex);
-
- xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
- file->ucontext, &udata);
- if (IS_ERR(xrcd)) {
- ret = PTR_ERR(xrcd);
- goto err;
+ if (!xrcd) {
+ xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
+ file->ucontext, &udata);
+ if (IS_ERR(xrcd)) {
+ ret = PTR_ERR(xrcd);
+ goto err;
+ }
+ xrcd->uobject = (cmd.fd == -1) ? uobj : NULL;
+ xrcd->inode = inode;
+ xrcd->device = file->device->ib_dev;
+ atomic_set(&xrcd->usecnt, 0);
+ new_xrcd = 1;
}
- xrcd->fd = cmd.fd;
- xrcd->flags = cmd.oflags;
- xrcd->uobject = uobj;
- xrcd->device = file->device->ib_dev;
- atomic_set(&xrcd->usecnt, 0);
-
uobj->object = xrcd;
ret = idr_add_uobj(&ib_uverbs_xrc_domain_idr, uobj);
if (ret)
@@ -2360,6 +2500,16 @@ ssize_t ib_uverbs_open_xrc_domain(struct
memset(&resp, 0, sizeof resp);
resp.xrcd_handle = uobj->id;
+ if (inode) {
+ if (new_xrcd) {
+ /* create new inode/xrcd table entry */
+ ret = insert_xrcd(file->device->ib_dev, inode, xrcd);
+ if (ret)
+ goto err_insert_xrcd;
+ }
+ atomic_inc(&xrcd->usecnt);
+ }
+
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
@@ -2374,16 +2524,29 @@ ssize_t ib_uverbs_open_xrc_domain(struct
up_write(&uobj->mutex);
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
+
+ if (inode) {
+ if (new_xrcd)
+ xrcd_table_delete(file->device->ib_dev, inode);
+ atomic_dec(&xrcd->usecnt);
+ }
+
+err_insert_xrcd:
+ idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
err_idr:
ib_dealloc_xrcd(xrcd);
err:
put_uobj_write(uobj);
+
+err_table_mutex_unlock:
+
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
return ret;
}
@@ -2393,14 +2556,25 @@ ssize_t ib_uverbs_close_xrc_domain(struc
{
struct ib_uverbs_close_xrc_domain cmd;
struct ib_uobject *uobj;
- int ret;
+ struct ib_xrcd *xrcd = NULL;
+ struct inode *inode = NULL;
+ int ret = 0;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
+ mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
uobj = idr_write_uobj(&ib_uverbs_xrc_domain_idr, cmd.xrcd_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ if (!uobj) {
+ ret = -EINVAL;
+ goto err_unlock_mutex;
+ }
+
+ xrcd = (struct ib_xrcd *) (uobj->object);
+ inode = xrcd->inode;
+
+ if (inode)
+ atomic_dec(&xrcd->usecnt);
ret = ib_dealloc_xrcd(uobj->object);
if (!ret)
@@ -2408,8 +2582,11 @@ ssize_t ib_uverbs_close_xrc_domain(struc
put_uobj_write(uobj);
- if (ret)
- return ret;
+ if (ret && !inode)
+ goto err_unlock_mutex;
+
+ if (!ret && inode)
+ xrcd_table_delete(file->device->ib_dev, inode);
idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
@@ -2419,6 +2596,27 @@ ssize_t ib_uverbs_close_xrc_domain(struc
put_uobj(uobj);
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
return in_len;
+
+err_unlock_mutex:
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+ return ret;
}
+void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
+ struct ib_xrcd *xrcd)
+{
+ struct inode *inode = NULL;
+ int ret = 0;
+
+ inode = xrcd->inode;
+ if (inode)
+ atomic_dec(&xrcd->usecnt);
+
+ ret = ib_dealloc_xrcd(xrcd);
+ if (!ret && inode)
+ xrcd_table_delete(ib_dev, inode);
+}
+
+
Index: ofed_kernel/include/rdma/ib_verbs.h
===================================================================
--- ofed_kernel.orig/include/rdma/ib_verbs.h 2007-09-16 16:32:43.674747000 +0200
+++ ofed_kernel/include/rdma/ib_verbs.h 2007-09-17 12:31:55.239267000 +0200
@@ -52,6 +52,8 @@
#include <asm/atomic.h>
#include <asm/scatterlist.h>
#include <asm/uaccess.h>
+#include <linux/rbtree.h>
+#include <linux/mutex.h>
union ib_gid {
u8 raw[16];
@@ -758,11 +760,8 @@ struct ib_pd {
struct ib_xrcd {
struct ib_device *device;
struct ib_uobject *uobject;
- struct rb_node node;
- u32 xrc_domain_num;
struct inode *inode;
- int fd;
- u32 flags;
+ struct rb_node node;
atomic_t usecnt; /* count all resources */
};
@@ -1084,6 +1083,8 @@ struct ib_device {
__be64 node_guid;
u8 node_type;
u8 phys_port_cnt;
+ struct rb_root ib_uverbs_xrcd_table;
+ struct mutex xrcd_table_mutex;
};
struct ib_client {
Index: ofed_kernel/drivers/infiniband/core/device.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/core/device.c 2007-09-10 09:07:21.951463000 +0300
+++ ofed_kernel/drivers/infiniband/core/device.c 2007-09-17 15:06:09.213698000 +0200
@@ -290,6 +290,8 @@ int ib_register_device(struct ib_device
INIT_LIST_HEAD(&device->client_data_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
+ device->ib_uverbs_xrcd_table = RB_ROOT;
+ mutex_init(&device->xrcd_table_mutex);
ret = read_port_table_lengths(device);
if (ret) {
Index: ofed_kernel/drivers/infiniband/core/uverbs_main.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/core/uverbs_main.c 2007-09-16 16:32:22.000000000 +0200
+++ ofed_kernel/drivers/infiniband/core/uverbs_main.c 2007-09-17 15:17:28.552933000 +0200
@@ -243,13 +243,15 @@ static int ib_uverbs_cleanup_ucontext(st
kfree(uobj);
}
+ mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
list_for_each_entry_safe(uobj, tmp, &context->xrc_domain_list, list) {
struct ib_xrcd *xrcd = uobj->object;
idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
- ib_dealloc_xrcd(xrcd);
+ ib_uverbs_dealloc_xrcd(file->device->ib_dev, xrcd);
kfree(uobj);
}
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
struct ib_pd *pd = uobj->object;
Index: ofed_kernel/drivers/infiniband/core/uverbs.h
===================================================================
--- ofed_kernel.orig/drivers/infiniband/core/uverbs.h 2007-09-17 15:18:32.000000000 +0200
+++ ofed_kernel/drivers/infiniband/core/uverbs.h 2007-09-17 15:19:16.885160000 +0200
@@ -164,6 +164,8 @@ void ib_uverbs_qp_event_handler(struct i
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
+void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
+ struct ib_xrcd *xrcd);
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
More information about the general
mailing list