[openib-general] [PATCH] Kernel stale CQ event handling (was: ibv_get_async_event)

Roland Dreier rolandd at cisco.com
Tue Sep 6 08:56:54 PDT 2005


This is the kernel side of MST's idea for stale CQ event handling.
When a CQ is destroyed, we sweep all the existing completion events
for that CQ and, if requested, create a "dead CQ" event so that
userspace can know the CQ is gone.

--- infiniband/core/uverbs_cmd.c	(revision 3319)
+++ infiniband/core/uverbs_cmd.c	(working copy)
@@ -590,7 +590,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uv
 	struct ib_uverbs_create_cq      cmd;
 	struct ib_uverbs_create_cq_resp resp;
 	struct ib_udata                 udata;
-	struct ib_uevent_object        *uobj;
+	struct ib_ucq_object           *uobj;
 	struct ib_cq                   *cq;
 	int                             ret;
 
@@ -614,6 +614,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uv
 	uobj->uobject.user_handle = cmd.user_handle;
 	uobj->uobject.context     = file->ucontext;
 	uobj->events_reported     = 0;
+	INIT_LIST_HEAD(&uobj->comp_list);
 	INIT_LIST_HEAD(&uobj->event_list);
 
 	cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe,
@@ -685,8 +686,9 @@ ssize_t ib_uverbs_destroy_cq(struct ib_u
 	struct ib_uverbs_destroy_cq      cmd;
 	struct ib_uverbs_destroy_cq_resp resp;
 	struct ib_cq               	*cq;
-	struct ib_uevent_object        	*uobj;
-	struct ib_uverbs_async_event	*evt, *tmp;
+	struct ib_ucq_object        	*uobj;
+	struct ib_uverbs_event		*evt, *tmp, *dead_evt;
+	u64				 user_handle;
 	int                        	 ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -700,7 +702,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_u
 	if (!cq || cq->uobject->context != file->ucontext)
 		goto out;
 
-	uobj = container_of(cq->uobject, struct ib_uevent_object, uobject);
+	user_handle = cq->uobject->user_handle;
+	uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
 
 	ret = ib_destroy_cq(cq);
 	if (ret)
@@ -712,6 +715,31 @@ ssize_t ib_uverbs_destroy_cq(struct ib_u
 	list_del(&uobj->uobject.list);
 	spin_unlock_irq(&file->ucontext->lock);
 
+	if (cmd.dead_event) {
+		dead_evt = kmalloc(sizeof *dead_evt, GFP_KERNEL);
+		if (dead_evt) {
+			dead_evt->desc.comp.cq_handle = user_handle;
+			dead_evt->desc.comp.is_dead   = 1;
+			dead_evt->desc.comp.reserved  = 0;
+		}
+	} else
+		dead_evt = NULL;
+
+	spin_lock_irq(&file->comp_file[0].lock);
+
+	list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
+		list_del(&evt->list);
+		kfree(evt);
+	}
+
+	if (dead_evt) {
+		list_add_tail(&dead_evt->list, &file->comp_file[0].event_list);
+		wake_up_interruptible(&file->comp_file[0].poll_wait);
+		kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN);
+	}
+
+	spin_unlock_irq(&file->comp_file[0].lock);
+
 	spin_lock_irq(&file->async_file.lock);
 	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
 		list_del(&evt->list);
@@ -955,7 +983,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_u
 	struct ib_uverbs_destroy_qp_resp resp;
 	struct ib_qp               	*qp;
 	struct ib_uevent_object        	*uobj;
-	struct ib_uverbs_async_event	*evt, *tmp;
+	struct ib_uverbs_event		*evt, *tmp;
 	int                        	 ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1193,7 +1221,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_
 	struct ib_uverbs_destroy_srq_resp resp;
 	struct ib_srq               	 *srq;
 	struct ib_uevent_object        	 *uobj;
-	struct ib_uverbs_async_event	*evt, *tmp;
+	struct ib_uverbs_event		 *evt, *tmp;
 	int                         	  ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
--- infiniband/core/uverbs.h	(revision 3319)
+++ infiniband/core/uverbs.h	(working copy)
@@ -76,21 +76,26 @@ struct ib_uverbs_file {
 	struct ib_uverbs_event_file	        comp_file[1];
 };
 
-struct ib_uverbs_async_event {
-	struct ib_uverbs_async_event_desc	desc;
+struct ib_uverbs_event {
+	union {
+		struct ib_uverbs_async_event_desc	async;
+		struct ib_uverbs_comp_event_desc	comp;
+	}					desc;
 	struct list_head			list;
 	struct list_head			obj_list;
 	u32				       *counter;
 };
 
-struct ib_uverbs_comp_event {
-	struct ib_uverbs_comp_event_desc	desc;
-	struct list_head			list;
+struct ib_uevent_object {
+	struct ib_uobject	uobject;
+	struct list_head	event_list;
+	u32			events_reported;
 };
 
-struct ib_uevent_object {
+struct ib_ucq_object {
 	struct ib_uobject	uobject;
 	struct list_head	event_list;
+	struct list_head	comp_list;
 	u32			events_reported;
 };
 
--- infiniband/core/uverbs_main.c	(revision 3319)
+++ infiniband/core/uverbs_main.c	(working copy)
@@ -128,7 +128,7 @@ static int ib_dealloc_ucontext(struct ib
 		idr_remove(&ib_uverbs_cq_idr, uobj->id);
 		ib_destroy_cq(cq);
 		list_del(&uobj->list);
-		kfree(container_of(uobj, struct ib_uevent_object, uobject));
+		kfree(container_of(uobj, struct ib_ucq_object, uobject));
 	}
 
 	list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
@@ -182,9 +182,8 @@ static ssize_t ib_uverbs_event_read(stru
 				    size_t count, loff_t *pos)
 {
 	struct ib_uverbs_event_file *file = filp->private_data;
-	struct ib_uverbs_async_event *async_evt = NULL;
+	struct ib_uverbs_event *event;
 	u32 *counter = NULL;
-	void *event;
 	int eventsz;
 	int ret = 0;
 
@@ -209,19 +208,17 @@ static ssize_t ib_uverbs_event_read(stru
 		return -ENODEV;
 	}
 
+	event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
+
 	if (file->is_async) {
-		async_evt = list_entry(file->event_list.next,
-				       struct ib_uverbs_async_event, list);
-		event     = async_evt;
-		eventsz   = sizeof *async_evt;
-		counter   = async_evt->counter;
+		eventsz = sizeof (struct ib_uverbs_async_event_desc);
+		counter = event->counter;
 
 		if (counter)
 			++*counter;
 	} else {
-		event   = list_entry(file->event_list.next,
-				     struct ib_uverbs_comp_event, list);
 		eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+		counter = NULL;
 	}
 
 	if (eventsz > count) {
@@ -229,8 +226,8 @@ static ssize_t ib_uverbs_event_read(stru
 		event = NULL;
 	} else {
 		list_del(file->event_list.next);
-		if (counter)
-			list_del(&async_evt->obj_list);
+		if (counter || (!file->is_async && !event->desc.comp.is_dead))
+			list_del(&event->obj_list);
 	}
 
 	spin_unlock_irq(&file->lock);
@@ -267,16 +264,13 @@ static unsigned int ib_uverbs_event_poll
 
 static void ib_uverbs_event_release(struct ib_uverbs_event_file *file)
 {
-	struct list_head *entry, *tmp;
+	struct ib_uverbs_event *entry, *tmp;
 
 	spin_lock_irq(&file->lock);
 	if (file->fd != -1) {
 		file->fd = -1;
-		list_for_each_safe(entry, tmp, &file->event_list)
-			if (file->is_async)
-				kfree(list_entry(entry, struct ib_uverbs_async_event, list));
-			else
-				kfree(list_entry(entry, struct ib_uverbs_comp_event, list));
+		list_for_each_entry_safe(entry, tmp, &file->event_list, list)
+			kfree(entry);
 	}
 	spin_unlock_irq(&file->lock);
 }
@@ -314,18 +308,24 @@ static struct file_operations uverbs_eve
 
 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
 {
-	struct ib_uverbs_file       *file = cq_context;
-	struct ib_uverbs_comp_event *entry;
-	unsigned long                flags;
+	struct ib_uverbs_file  *file = cq_context;
+	struct ib_ucq_object *uobj;
+	struct ib_uverbs_event *entry;
+	unsigned long           flags;
 
 	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
 	if (!entry)
 		return;
 
-	entry->desc.cq_handle = cq->uobject->user_handle;
+	uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+
+	entry->desc.comp.cq_handle = cq->uobject->user_handle;
+	entry->desc.comp.is_dead   = 0;
+	entry->desc.comp.reserved  = 0;
 
 	spin_lock_irqsave(&file->comp_file[0].lock, flags);
 	list_add_tail(&entry->list, &file->comp_file[0].event_list);
+	list_add_tail(&entry->obj_list, &uobj->comp_list);
 	spin_unlock_irqrestore(&file->comp_file[0].lock, flags);
 
 	wake_up_interruptible(&file->comp_file[0].poll_wait);
@@ -337,16 +337,16 @@ static void ib_uverbs_async_handler(stru
 				    struct list_head *obj_list,
 				    u32 *counter)
 {
-	struct ib_uverbs_async_event *entry;
+	struct ib_uverbs_event *entry;
 	unsigned long flags;
 
 	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
 	if (!entry)
 		return;
 
-	entry->desc.element    = element;
-	entry->desc.event_type = event;
-	entry->counter         = counter;
+	entry->desc.async.element    = element;
+	entry->desc.async.event_type = event;
+	entry->counter               = counter;
 
 	spin_lock_irqsave(&file->async_file.lock, flags);
 	list_add_tail(&entry->list, &file->async_file.event_list);
@@ -360,10 +360,10 @@ static void ib_uverbs_async_handler(stru
 
 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
 {
-	struct ib_uevent_object *uobj;
+	struct ib_ucq_object *uobj;
 
 	uobj = container_of(event->element.cq->uobject,
-			    struct ib_uevent_object, uobject);
+			    struct ib_ucq_object, uobject);
 
 	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
 				event->event, &uobj->event_list,
--- infiniband/include/rdma/ib_user_verbs.h	(revision 3319)
+++ infiniband/include/rdma/ib_user_verbs.h	(working copy)
@@ -102,6 +102,8 @@ struct ib_uverbs_async_event_desc {
 
 struct ib_uverbs_comp_event_desc {
 	__u64 cq_handle;
+	__u32 is_dead;
+	__u32 reserved;
 };
 
 /*
@@ -294,6 +296,7 @@ struct ib_uverbs_create_cq_resp {
 struct ib_uverbs_destroy_cq {
 	__u64 response;
 	__u32 cq_handle;
+	__u32 dead_event;
 };
 
 struct ib_uverbs_destroy_cq_resp {



More information about the general mailing list