[openib-general] [PATCH] libibverbs handling for stale CQ events (was: ibv_get_async_event)

Roland Dreier rolandd at cisco.com
Tue Sep 6 08:56:55 PDT 2005


Here's the libibverbs part of MST's idea for stale CQ event handling.

--- libibverbs/include/infiniband/verbs.h	(revision 3296)
+++ libibverbs/include/infiniband/verbs.h	(working copy)
@@ -503,6 +503,7 @@ struct ibv_cq {
 	pthread_mutex_t		mutex;
 	pthread_cond_t		cond;
 	uint32_t		events_completed;
+	int			need_dead_event;
 };
 
 struct ibv_ah {
@@ -545,6 +546,7 @@ struct ibv_context_ops {
 	int			(*req_notify_cq)(struct ibv_cq *cq, int solicited);
 	void			(*cq_event)(struct ibv_cq *cq);
 	int			(*destroy_cq)(struct ibv_cq *cq);
+	int			(*free_cq)(struct ibv_cq *cq);
 	struct ibv_srq *	(*create_srq)(struct ibv_pd *pd,
 					      struct ibv_srq_init_attr *srq_init_attr);
 	int			(*modify_srq)(struct ibv_srq *srq,
@@ -677,14 +679,32 @@ extern struct ibv_cq *ibv_create_cq(stru
 
 /**
  * ibv_destroy_cq - Destroy a completion queue
+ * @cq: CQ to destroy.
+ *
+ * If ibv_req_notify_cq() has ever been called for @cq, then the
+ * CQ structure will not be freed until ibv_get_cq_event() has
+ * returned a "dead CQ" event (ie is_dead != 0) for this CQ.
+ *
+ * Calling ibv_destroy_cq() from one thread while another thread calls
+ * ibv_req_notify_cq() for the same CQ will lead to unpredictable
+ * results.  Don't do that.
  */
 extern int ibv_destroy_cq(struct ibv_cq *cq);
 
 /**
  * ibv_get_cq_event - Read next CQ event
+ * @context: Context to get CQ event for
+ * @comp_num: Index of completion event to check.  Must be >= 0 and
+ *   <= context->num_comp.
+ * @cq: Used to return pointer to CQ.
+ * @cq_context: Used to return consumer-supplied CQ context.
+ * @is_dead: If non-zero, then the event indicates that the CQ has
+ *   been destroyed and no more completion events will be generated
+ *   for the CQ.
  */
 extern int ibv_get_cq_event(struct ibv_context *context, int comp_num,
-			    struct ibv_cq **cq, void **cq_context);
+			    struct ibv_cq **cq, void **cq_context,
+			    int *is_dead);
  
 
 /**
@@ -710,6 +730,7 @@ static inline int ibv_poll_cq(struct ibv
  */
 static inline int ibv_req_notify_cq(struct ibv_cq *cq, int solicited)
 {
+	cq->need_dead_event = 1;
 	return cq->context->ops.req_notify_cq(cq, solicited);
 }
 
--- libibverbs/include/infiniband/kern-abi.h	(revision 3296)
+++ libibverbs/include/infiniband/kern-abi.h	(working copy)
@@ -105,8 +105,14 @@ struct ibv_kern_async_event {
 	__u32 reserved;
 };
 
+struct ibv_comp_event_v1 {
+	__u64 cq_handle;
+};
+
 struct ibv_comp_event {
 	__u64 cq_handle;
+	__u32 is_dead;
+	__u32 reserved;
 };
 
 /*
@@ -332,6 +338,7 @@ struct ibv_destroy_cq {
 	__u16 out_words;
 	__u64 response;
 	__u32 cq_handle;
+	__u32 dead_event;
 };
 
 struct ibv_destroy_cq_resp {
--- libibverbs/src/verbs.c	(revision 3296)
+++ libibverbs/src/verbs.c	(working copy)
@@ -42,6 +42,17 @@
 
 #include "ibverbs.h"
 
+/*
+ * Keep a dead CQ list in userspace to simulate the dead CQ completion
+ * event introduced with ABI version 2.  This is slightly racy but it
+ * at least allows us to keep the same library API.
+ */
+static pthread_mutex_t cq_dead_mutex = PTHREAD_MUTEX_INITIALIZER;
+static struct ibv_dead_cq {
+	struct ibv_cq		*cq;
+	struct ibv_dead_cq	*next;
+} *cq_dead_list = NULL;
+
 int ibv_query_device(struct ibv_context *context,
 		     struct ibv_device_attr *device_attr)
 {
@@ -110,6 +121,7 @@ struct ibv_cq *ibv_create_cq(struct ibv_
 		cq->context    	     = context;
 		cq->cq_context 	     = cq_context;
 		cq->events_completed = 0;
+		cq->need_dead_event  = 0;
 		pthread_mutex_init(&cq->mutex, NULL);
 		pthread_cond_init(&cq->cond, NULL);
 	}
@@ -119,25 +131,89 @@ struct ibv_cq *ibv_create_cq(struct ibv_
 
 int ibv_destroy_cq(struct ibv_cq *cq)
 {
-	return cq->context->ops.destroy_cq(cq);
+	struct ibv_dead_cq *dead;
+	int need_dead_event = cq->need_dead_event;
+	int ret;
+
+	ret = cq->context->ops.destroy_cq(cq);
+
+	if (abi_ver == 1 && !ret && need_dead_event) {
+		pthread_mutex_lock(&cq_dead_mutex);
+		dead = malloc(sizeof *dead);
+		if (dead) {
+			dead->cq     = cq;
+			dead->next   = cq_dead_list;
+			cq_dead_list = dead;
+		}
+		pthread_mutex_unlock(&cq_dead_mutex);
+	}
+
+	return ret;
 }
 
+int ibv_get_cq_event_v1(struct ibv_context *context, int comp_num,
+			struct ibv_cq **cq, void **cq_context)
+{
+	struct ibv_dead_cq *dead;
+	struct ibv_comp_event_v1 ev;
+
+	/*
+	 * Check if there are any synthetic "dead CQ" events before
+	 * trying to read our file descriptor.  There are some race
+	 * windows here -- for example, we might enter this function
+	 * to early to see a dead CQ event and then block forever if
+	 * no real CQ events are generated later.
+	 */
+
+	pthread_mutex_lock(&cq_dead_mutex);
+	if (cq_dead_list != NULL) {
+		*cq          = cq_dead_list->cq;
+		*cq_context  = (*cq)->cq_context;
+		dead         = cq_dead_list;
+		cq_dead_list = dead->next;
+		free(dead);
+		pthread_mutex_unlock(&cq_dead_mutex);
+
+		(*cq)->context->ops.free_cq(cq);
+		return 0;
+	}
+	pthread_mutex_unlock(&cq_dead_mutex);
+
+	if (read(context->cq_fd[comp_num], &ev, sizeof ev) != sizeof ev)
+		return -1;
+
+	*cq         = (struct ibv_cq *) (uintptr_t) ev.cq_handle;
+	*cq_context = (*cq)->cq_context;
+
+	if ((*cq)->context->ops.cq_event)
+		(*cq)->context->ops.cq_event(*cq);
+
+	return 0;
+}
 
 int ibv_get_cq_event(struct ibv_context *context, int comp_num,
-		     struct ibv_cq **cq, void **cq_context)
+		     struct ibv_cq **cq, void **cq_context, int *is_dead)
 {
 	struct ibv_comp_event ev;
 
 	if (comp_num < 0 || comp_num >= context->num_comp)
 		return -1;
 
+	if (abi_ver == 1) {
+		*is_dead = 0;
+		return ibv_get_cq_event_v1(context, comp_num, cq, cq_context);
+	}
+
 	if (read(context->cq_fd[comp_num], &ev, sizeof ev) != sizeof ev)
 		return -1;
 
 	*cq         = (struct ibv_cq *) (uintptr_t) ev.cq_handle;
 	*cq_context = (*cq)->cq_context;
+	*is_dead    = ev.is_dead;
 
-	if ((*cq)->context->ops.cq_event)
+	if (ev.is_dead)
+		(*cq)->context->ops.free_cq(*cq);
+	else if ((*cq)->context->ops.cq_event)
 		(*cq)->context->ops.cq_event(*cq);
 
 	return 0;
--- libibverbs/src/cmd.c	(revision 3296)
+++ libibverbs/src/cmd.c	(working copy)
@@ -297,7 +297,8 @@ int ibv_cmd_destroy_cq(struct ibv_cq *cq
 		return ibv_cmd_destroy_cq_v1(cq);
 
 	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_CQ, &resp, sizeof resp);
-	cmd.cq_handle = cq->handle;
+	cmd.cq_handle  = cq->handle;
+	cmd.dead_event = cq->need_dead_event;
 
 	if (write(cq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
 		return errno;
--- libibverbs/ChangeLog	(revision 3296)
+++ libibverbs/ChangeLog	(working copy)
@@ -1,3 +1,13 @@
+2005-09-04  Roland Dreier  <roland at cisco.com>
+
+	* include/infiniband/verbs.h, include/infiniband/kern-abi.h,
+	src/verbs.c, src/cmd.c, examples/rc_pingpong.c,
+	examples/srq_pingpong.c, examples/uc_pingpong.c,
+	examples/ud_pingpong.c: Update to handle new kernel ABI for
+	avoiding stale completion events.  Change ibv_get_cq_event() to
+	return "dead CQ" events, and synthesize these events for old
+	kernels.
+
 2005-08-31  Roland Dreier  <roland at cisco.com>
 
 	* include/infiniband/kern-abi.h, include/infiniband/verbs.h,
--- libibverbs/examples/rc_pingpong.c	(revision 3296)
+++ libibverbs/examples/rc_pingpong.c	(working copy)
@@ -604,12 +604,18 @@ int main(int argc, char *argv[])
 		if (use_event) {
 			struct ibv_cq *ev_cq;
 			void          *ev_ctx;
+			int            dead;
 
-			if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+			if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx, &dead)) {
 				fprintf(stderr, "Failed to get cq_event\n");
 				return 1;
 			}
 
+			if (dead) {
+				fprintf(stderr, "Unexpected CQ dead event\n");
+				return 1;
+			}
+
 			if (ev_cq != ctx->cq) {
 				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
 				return 1;
--- libibverbs/examples/srq_pingpong.c	(revision 3296)
+++ libibverbs/examples/srq_pingpong.c	(working copy)
@@ -677,12 +677,18 @@ int main(int argc, char *argv[])
 		if (use_event) {
 			struct ibv_cq *ev_cq;
 			void          *ev_ctx;
+			int            dead;
 
-			if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+			if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx, &dead)) {
 				fprintf(stderr, "Failed to get cq_event\n");
 				return 1;
 			}
 
+			if (dead) {
+				fprintf(stderr, "Unexpected CQ dead event\n");
+				return 1;
+			}
+
 			if (ev_cq != ctx->cq) {
 				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
 				return 1;
--- libibverbs/examples/uc_pingpong.c	(revision 3296)
+++ libibverbs/examples/uc_pingpong.c	(working copy)
@@ -596,12 +596,18 @@ int main(int argc, char *argv[])
 		if (use_event) {
 			struct ibv_cq *ev_cq;
 			void          *ev_ctx;
+			int            dead;
 
-			if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+			if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx, &dead)) {
 				fprintf(stderr, "Failed to get cq_event\n");
 				return 1;
 			}
 
+			if (dead) {
+				fprintf(stderr, "Unexpected CQ dead event\n");
+				return 1;
+			}
+
 			if (ev_cq != ctx->cq) {
 				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
 				return 1;
--- libibverbs/examples/ud_pingpong.c	(revision 3296)
+++ libibverbs/examples/ud_pingpong.c	(working copy)
@@ -600,12 +600,18 @@ int main(int argc, char *argv[])
 		if (use_event) {
 			struct ibv_cq *ev_cq;
 			void          *ev_ctx;
+			int            dead;
 
-			if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+			if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx, &dead)) {
 				fprintf(stderr, "Failed to get cq_event\n");
 				return 1;
 			}
 
+			if (dead) {
+				fprintf(stderr, "Unexpected CQ dead event\n");
+				return 1;
+			}
+
 			if (ev_cq != ctx->cq) {
 				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
 				return 1;



More information about the general mailing list