[ofa-general] [PATCH 6/8] core: Add XRC support for working with file descriptors

Jack Morgenstein jackm at dev.mellanox.co.il
Wed Jan 23 02:00:26 PST 2008


Add XRC support for working with file descriptors, to allow
sharing XRC domains between processes.

Changes: none

Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>

Index: infiniband/drivers/infiniband/core/uverbs_cmd.c
===================================================================
--- infiniband.orig/drivers/infiniband/core/uverbs_cmd.c	2008-01-22 20:16:45.000000000 +0200
+++ infiniband/drivers/infiniband/core/uverbs_cmd.c	2008-01-22 20:18:28.000000000 +0200
@@ -39,6 +39,7 @@
 #include <linux/fs.h>
 
 #include <asm/uaccess.h>
+#include <asm/fcntl.h>
 
 #include "uverbs.h"
 
@@ -256,14 +257,18 @@ static void put_srq_read(struct ib_srq *
 	put_uobj_read(srq->uobject);
 }
 
-static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context)
+static struct ib_xrcd *idr_read_xrcd(int xrcd_handle,
+				     struct ib_ucontext *context,
+				     struct ib_uobject **uobj)
 {
-	return idr_read_obj(&ib_uverbs_xrc_domain_idr, xrcd_handle, context, 0);
+	*uobj = idr_read_uobj(&ib_uverbs_xrc_domain_idr, xrcd_handle,
+			      context, 0);
+	return *uobj ? (*uobj)->object : NULL;
 }
 
-static void put_xrcd_read(struct ib_xrcd *xrcd)
+static void put_xrcd_read(struct ib_uobject *uobj)
 {
-	put_uobj_read(xrcd->uobject);
+	put_uobj_read(uobj);
 }
 
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
@@ -1040,6 +1045,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uv
 	struct ib_qp                   *qp;
 	struct ib_qp_init_attr          attr;
 	struct ib_xrcd	 	       *xrcd;
+	struct ib_uobject		*xrcd_uobj;
 	int ret;
 
 	if (out_len < sizeof resp)
@@ -1062,7 +1068,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uv
 	srq = (cmd.is_srq && cmd.qp_type != IB_QPT_XRC) ?
 		idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
 	xrcd = cmd.qp_type == IB_QPT_XRC ?
-		idr_read_xrcd(cmd.srq_handle, file->ucontext) : NULL;
+		idr_read_xrcd(cmd.srq_handle, file->ucontext, &xrcd_uobj) : NULL;
 	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
 	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
 	rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
@@ -1145,7 +1151,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uv
 	if (srq)
 		put_srq_read(srq);
 	if (xrcd)
-		put_xrcd_read(xrcd);
+		put_xrcd_read(xrcd_uobj);
 
 	mutex_lock(&file->mutex);
 	list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
@@ -1173,7 +1179,7 @@ err_put:
 	if (srq)
 		put_srq_read(srq);
 	if (xrcd)
-		put_xrcd_read(xrcd);
+		put_xrcd_read(xrcd_uobj);
 
 	put_uobj_write(&obj->uevent.uobject);
 	return ret;
@@ -2077,6 +2083,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct 
 	struct ib_cq			*xrc_cq;
 	struct ib_xrcd			*xrcd;
 	struct ib_srq_init_attr          attr;
+	struct ib_uobject		*xrcd_uobj;
 	int ret;
 
 	if (out_len < sizeof resp)
@@ -2108,7 +2115,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct 
 		goto err_put_pd;
 	}
 
-	xrcd  = idr_read_xrcd(cmd.xrcd_handle, file->ucontext);
+	xrcd  = idr_read_xrcd(cmd.xrcd_handle, file->ucontext, &xrcd_uobj);
 	if (!xrcd) {
 		ret = -EINVAL;
 		goto err_put_cq;
@@ -2159,7 +2166,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct 
 		goto err_copy;
 	}
 
-	put_xrcd_read(xrcd);
+	put_xrcd_read(xrcd_uobj);
 	put_cq_read(xrc_cq);
 	put_pd_read(pd);
 
@@ -2180,7 +2187,7 @@ err_destroy:
 	ib_destroy_srq(srq);
 
 err_put:
-	put_xrcd_read(xrcd);
+	put_xrcd_read(xrcd_uobj);
 
 err_put_cq:
 	put_cq_read(xrc_cq);
@@ -2312,6 +2319,117 @@ ssize_t ib_uverbs_destroy_srq(struct ib_
 	return ret ? ret : in_len;
 }
 
+static struct inode * xrc_fd2inode(unsigned int fd)
+{
+	struct file * f = fget(fd);
+
+	if (!f)
+		return NULL;
+
+	return f->f_dentry->d_inode;
+}
+
+struct xrcd_table_entry {
+	struct rb_node node;
+	struct inode * inode;
+	struct ib_xrcd *xrcd;
+};
+
+static int xrcd_table_insert(struct ib_device *dev,
+			     struct inode *i_n,
+			     struct ib_xrcd *xrcd)
+{
+	struct xrcd_table_entry *entry, *scan;
+	struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node;
+	struct rb_node *parent = NULL;
+
+	entry = kmalloc(sizeof(struct xrcd_table_entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->inode = i_n;
+	entry->xrcd = xrcd;
+
+	while (*p)
+	{
+		parent = *p;
+		scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+		if (i_n < scan->inode)
+			p = &(*p)->rb_left;
+		else if (i_n > scan->inode)
+			p = &(*p)->rb_right;
+		else {
+			kfree(entry);
+			return -EEXIST;
+		}
+	}
+
+	rb_link_node(&entry->node, parent, p);
+	rb_insert_color(&entry->node, &dev->ib_uverbs_xrcd_table);
+	return 0;
+}
+
+static int insert_xrcd(struct ib_device *dev, struct inode *i_n,
+		       struct ib_xrcd *xrcd)
+{
+	int ret;
+
+	ret = xrcd_table_insert(dev, i_n, xrcd);
+	if (!ret)
+		igrab(i_n);
+
+	return ret;
+}
+
+static struct xrcd_table_entry * xrcd_table_search(struct ib_device *dev,
+						   struct inode *i_n)
+{
+	struct xrcd_table_entry *scan;
+	struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*p)
+	{
+		parent = *p;
+		scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+		if (i_n < scan->inode)
+			p = &(*p)->rb_left;
+		else if (i_n > scan->inode)
+			p = &(*p)->rb_right;
+		else
+			return scan;
+	}
+	return NULL;
+}
+
+static int find_xrcd(struct ib_device *dev, struct inode *i_n,
+		     struct ib_xrcd **xrcd)
+{
+	struct xrcd_table_entry *entry;
+
+	entry = xrcd_table_search(dev, i_n);
+	if (!entry)
+		return -EINVAL;
+
+	*xrcd = entry->xrcd;
+	return 0;
+}
+
+
+static void xrcd_table_delete(struct ib_device *dev,
+			      struct inode *i_n)
+{
+	struct xrcd_table_entry *entry = xrcd_table_search(dev, i_n);
+
+	if (entry) {
+		iput(i_n);
+		rb_erase(&entry->node, &dev->ib_uverbs_xrcd_table);
+		kfree(entry);
+	}
+}
+
 ssize_t ib_uverbs_open_xrc_domain(struct ib_uverbs_file *file,
 				  const char __user *buf, int in_len,
 				  int out_len)
@@ -2320,8 +2438,10 @@ ssize_t ib_uverbs_open_xrc_domain(struct
 	struct ib_uverbs_open_xrc_domain_resp resp;
 	struct ib_udata			 udata;
 	struct ib_uobject             	*uobj;
-	struct ib_xrcd			*xrcd;
-	int				 ret;
+	struct ib_xrcd			*xrcd = NULL;
+	struct inode			*inode = NULL;
+	int				 ret = 0;
+	int				 new_xrcd = 0;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -2329,35 +2449,55 @@ ssize_t ib_uverbs_open_xrc_domain(struct
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	/* file descriptors/inodes not yet implemented */
-	if (cmd.fd != (u32) (-1))
-		return -ENOSYS;
-
 	INIT_UDATA(&udata, buf + sizeof cmd,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
+	mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
+	if (cmd.fd != (u32) (-1)) {
+		/* search for file descriptor */
+		inode = xrc_fd2inode(cmd.fd);
+		if (!inode) {
+			ret = -EBADF;
+			goto err_table_mutex_unlock;
+		}
+
+		ret = find_xrcd(file->device->ib_dev, inode, &xrcd);
+		if (ret && !(cmd.oflags & O_CREAT)) {
+			/* no file descriptor. Need CREATE flag */
+			ret = -EAGAIN;
+			goto err_table_mutex_unlock;
+        	}
+
+        	if (xrcd && cmd.oflags & O_EXCL){
+			ret = -EINVAL;
+			goto err_table_mutex_unlock;
+		}
+	}
+
 	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
-	if (!uobj)
-		return -ENOMEM;
+	if (!uobj) {
+		ret = -ENOMEM;
+		goto err_table_mutex_unlock;
+	}
 
 	init_uobj(uobj, 0, file->ucontext, &pd_lock_key);
 	down_write(&uobj->mutex);
 
-
-	xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
-						file->ucontext, &udata);
-	if (IS_ERR(xrcd)) {
-		ret = PTR_ERR(xrcd);
-		goto err;
+	if (!xrcd) {
+		xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
+							file->ucontext, &udata);
+		if (IS_ERR(xrcd)) {
+			ret = PTR_ERR(xrcd);
+			goto err;
+		}
+		xrcd->uobject = (cmd.fd == -1) ? uobj : NULL;
+		xrcd->inode = inode;
+		xrcd->device  = file->device->ib_dev;
+		atomic_set(&xrcd->usecnt, 0);
+		new_xrcd = 1;
 	}
 
-	xrcd->fd = cmd.fd;
-	xrcd->flags = cmd.oflags;
-	xrcd->uobject = uobj;
-	xrcd->device  = file->device->ib_dev;
-	atomic_set(&xrcd->usecnt, 0);
-
 	uobj->object = xrcd;
 	ret = idr_add_uobj(&ib_uverbs_xrc_domain_idr, uobj);
 	if (ret)
@@ -2366,6 +2506,16 @@ ssize_t ib_uverbs_open_xrc_domain(struct
 	memset(&resp, 0, sizeof resp);
 	resp.xrcd_handle = uobj->id;
 
+	if (inode) {
+		if (new_xrcd) {
+		/* create new inode/xrcd table entry */
+			ret = insert_xrcd(file->device->ib_dev, inode, xrcd);
+			if (ret)
+				goto err_insert_xrcd;
+		}
+		atomic_inc(&xrcd->usecnt);
+	}
+
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
@@ -2380,16 +2530,29 @@ ssize_t ib_uverbs_open_xrc_domain(struct
 
 	up_write(&uobj->mutex);
 
+	mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
 	return in_len;
 
 err_copy:
-	idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
+
+	if (inode) {
+		if (new_xrcd)
+			xrcd_table_delete(file->device->ib_dev, inode);
+		atomic_dec(&xrcd->usecnt);
+	}
+
+err_insert_xrcd:
+	idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
 
 err_idr:
 	ib_dealloc_xrcd(xrcd);
 
 err:
 	put_uobj_write(uobj);
+
+err_table_mutex_unlock:
+
+	mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
 	return ret;
 }
 
@@ -2399,14 +2562,25 @@ ssize_t ib_uverbs_close_xrc_domain(struc
 {
 	struct ib_uverbs_close_xrc_domain cmd;
 	struct ib_uobject          *uobj;
-	int                         ret;
+	struct ib_xrcd             *xrcd = NULL;
+	struct inode		   *inode = NULL;
+	int                         ret = 0;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
 	uobj = idr_write_uobj(&ib_uverbs_xrc_domain_idr, cmd.xrcd_handle, file->ucontext);
-	if (!uobj)
-		return -EINVAL;
+	if (!uobj) {
+		ret = -EINVAL;
+		goto err_unlock_mutex;
+	}
+
+	xrcd = (struct ib_xrcd *) (uobj->object);
+	inode = xrcd->inode;
+
+	if (inode)
+		atomic_dec(&xrcd->usecnt);
 
 	ret = ib_dealloc_xrcd(uobj->object);
 	if (!ret)
@@ -2414,8 +2588,11 @@ ssize_t ib_uverbs_close_xrc_domain(struc
 
 	put_uobj_write(uobj);
 
-	if (ret)
-		return ret;
+	if (ret && !inode)
+		goto err_unlock_mutex;
+
+	if (!ret && inode)
+		xrcd_table_delete(file->device->ib_dev, inode);
 
 	idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
 
@@ -2425,6 +2602,27 @@ ssize_t ib_uverbs_close_xrc_domain(struc
 
 	put_uobj(uobj);
 
+	mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
 	return in_len;
+
+err_unlock_mutex:
+	mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+	return ret;
 }
 
+void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
+				   struct ib_xrcd *xrcd)
+{
+	struct inode		   *inode = NULL;
+	int                         ret = 0;
+
+	inode = xrcd->inode;
+	if (inode)
+		atomic_dec(&xrcd->usecnt);
+
+	ret = ib_dealloc_xrcd(xrcd);
+	if (!ret && inode)
+		xrcd_table_delete(ib_dev, inode);
+}
+
+
Index: infiniband/include/rdma/ib_verbs.h
===================================================================
--- infiniband.orig/include/rdma/ib_verbs.h	2008-01-22 20:16:45.000000000 +0200
+++ infiniband/include/rdma/ib_verbs.h	2008-01-22 20:18:28.000000000 +0200
@@ -52,6 +52,8 @@
 
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
+#include <linux/rbtree.h>
+#include <linux/mutex.h>
 
 union ib_gid {
 	u8	raw[16];
@@ -751,11 +753,8 @@ struct ib_pd {
 struct ib_xrcd {
 	struct ib_device       *device;
 	struct ib_uobject      *uobject;
-	struct rb_node	        node;
-	u32			xrc_domain_num;
 	struct inode	       *inode;
-	int			fd;
-	u32			flags;
+	struct rb_node	        node;
 	atomic_t          	usecnt; /* count all resources */
 };
 
@@ -1075,6 +1074,8 @@ struct ib_device {
 	__be64			     node_guid;
 	u8                           node_type;
 	u8                           phys_port_cnt;
+	struct rb_root		     ib_uverbs_xrcd_table;
+	struct mutex		     xrcd_table_mutex;
 };
 
 struct ib_client {
Index: infiniband/drivers/infiniband/core/device.c
===================================================================
--- infiniband.orig/drivers/infiniband/core/device.c	2008-01-22 18:41:12.000000000 +0200
+++ infiniband/drivers/infiniband/core/device.c	2008-01-22 20:18:28.000000000 +0200
@@ -290,6 +290,8 @@ int ib_register_device(struct ib_device 
 	INIT_LIST_HEAD(&device->client_data_list);
 	spin_lock_init(&device->event_handler_lock);
 	spin_lock_init(&device->client_data_lock);
+	device->ib_uverbs_xrcd_table = RB_ROOT;
+	mutex_init(&device->xrcd_table_mutex);
 
 	ret = read_port_table_lengths(device);
 	if (ret) {
Index: infiniband/drivers/infiniband/core/uverbs_main.c
===================================================================
--- infiniband.orig/drivers/infiniband/core/uverbs_main.c	2008-01-22 20:16:45.000000000 +0200
+++ infiniband/drivers/infiniband/core/uverbs_main.c	2008-01-22 20:18:28.000000000 +0200
@@ -251,13 +251,15 @@ static int ib_uverbs_cleanup_ucontext(st
 		kfree(uobj);
 	}
 
+	mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
 	list_for_each_entry_safe(uobj, tmp, &context->xrc_domain_list, list) {
 		struct ib_xrcd *xrcd = uobj->object;
 
 		idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
-		ib_dealloc_xrcd(xrcd);
+		ib_uverbs_dealloc_xrcd(file->device->ib_dev, xrcd);
 		kfree(uobj);
 	}
+	mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
 
 	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
 		struct ib_pd *pd = uobj->object;
Index: infiniband/drivers/infiniband/core/uverbs.h
===================================================================
--- infiniband.orig/drivers/infiniband/core/uverbs.h	2008-01-22 20:16:45.000000000 +0200
+++ infiniband/drivers/infiniband/core/uverbs.h	2008-01-22 20:18:28.000000000 +0200
@@ -163,6 +163,8 @@ void ib_uverbs_qp_event_handler(struct i
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_event_handler(struct ib_event_handler *handler,
 			     struct ib_event *event);
+void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
+			    struct ib_xrcd *xrcd);
 
 #define IB_UVERBS_DECLARE_CMD(name)					\
 	ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,		\



More information about the general mailing list