[openib-general] [PATCH] libibverbs handling for stale CQ events (was: ibv_get_async_event)
Roland Dreier
rolandd at cisco.com
Tue Sep 6 08:56:55 PDT 2005
Here's the libibverbs part of MST's idea for stale CQ event handling.
--- libibverbs/include/infiniband/verbs.h (revision 3296)
+++ libibverbs/include/infiniband/verbs.h (working copy)
@@ -503,6 +503,7 @@ struct ibv_cq {
pthread_mutex_t mutex;
pthread_cond_t cond;
uint32_t events_completed;
+ int need_dead_event;
};
struct ibv_ah {
@@ -545,6 +546,7 @@ struct ibv_context_ops {
int (*req_notify_cq)(struct ibv_cq *cq, int solicited);
void (*cq_event)(struct ibv_cq *cq);
int (*destroy_cq)(struct ibv_cq *cq);
+ int (*free_cq)(struct ibv_cq *cq);
struct ibv_srq * (*create_srq)(struct ibv_pd *pd,
struct ibv_srq_init_attr *srq_init_attr);
int (*modify_srq)(struct ibv_srq *srq,
@@ -677,14 +679,32 @@ extern struct ibv_cq *ibv_create_cq(stru
/**
* ibv_destroy_cq - Destroy a completion queue
+ * @cq: CQ to destroy.
+ *
+ * If ibv_req_notify_cq() has ever been called for @cq, then the
+ * CQ structure will not be freed until ibv_get_cq_event() has
+ * returned a "dead CQ" event (ie is_dead != 0) for this CQ.
+ *
+ * Calling ibv_destroy_cq() from one thread while another thread calls
+ * ibv_req_notify_cq() for the same CQ will lead to unpredictable
+ * results. Don't do that.
*/
extern int ibv_destroy_cq(struct ibv_cq *cq);
/**
* ibv_get_cq_event - Read next CQ event
+ * @context: Context to get CQ event for
+ * @comp_num: Index of completion event to check. Must be >= 0 and
+ * <= context->num_comp.
+ * @cq: Used to return pointer to CQ.
+ * @cq_context: Used to return consumer-supplied CQ context.
+ * @is_dead: If non-zero, then the event indicates that the CQ has
+ * been destroyed and no more completion events will be generated
+ * for the CQ.
*/
extern int ibv_get_cq_event(struct ibv_context *context, int comp_num,
- struct ibv_cq **cq, void **cq_context);
+ struct ibv_cq **cq, void **cq_context,
+ int *is_dead);
/**
@@ -710,6 +730,7 @@ static inline int ibv_poll_cq(struct ibv
*/
static inline int ibv_req_notify_cq(struct ibv_cq *cq, int solicited)
{
+ cq->need_dead_event = 1;
return cq->context->ops.req_notify_cq(cq, solicited);
}
--- libibverbs/include/infiniband/kern-abi.h (revision 3296)
+++ libibverbs/include/infiniband/kern-abi.h (working copy)
@@ -105,8 +105,14 @@ struct ibv_kern_async_event {
__u32 reserved;
};
+struct ibv_comp_event_v1 {
+ __u64 cq_handle;
+};
+
struct ibv_comp_event {
__u64 cq_handle;
+ __u32 is_dead;
+ __u32 reserved;
};
/*
@@ -332,6 +338,7 @@ struct ibv_destroy_cq {
__u16 out_words;
__u64 response;
__u32 cq_handle;
+ __u32 dead_event;
};
struct ibv_destroy_cq_resp {
--- libibverbs/src/verbs.c (revision 3296)
+++ libibverbs/src/verbs.c (working copy)
@@ -42,6 +42,17 @@
#include "ibverbs.h"
+/*
+ * Keep a dead CQ list in userspace to simulate the dead CQ completion
+ * event introduced with ABI version 2. This is slightly racy but it
+ * at least allows us to keep the same library API.
+ */
+static pthread_mutex_t cq_dead_mutex = PTHREAD_MUTEX_INITIALIZER;
+static struct ibv_dead_cq {
+ struct ibv_cq *cq;
+ struct ibv_dead_cq *next;
+} *cq_dead_list = NULL;
+
int ibv_query_device(struct ibv_context *context,
struct ibv_device_attr *device_attr)
{
@@ -110,6 +121,7 @@ struct ibv_cq *ibv_create_cq(struct ibv_
cq->context = context;
cq->cq_context = cq_context;
cq->events_completed = 0;
+ cq->need_dead_event = 0;
pthread_mutex_init(&cq->mutex, NULL);
pthread_cond_init(&cq->cond, NULL);
}
@@ -119,25 +131,89 @@ struct ibv_cq *ibv_create_cq(struct ibv_
int ibv_destroy_cq(struct ibv_cq *cq)
{
- return cq->context->ops.destroy_cq(cq);
+ struct ibv_dead_cq *dead;
+ int need_dead_event = cq->need_dead_event;
+ int ret;
+
+ ret = cq->context->ops.destroy_cq(cq);
+
+ if (abi_ver == 1 && !ret && need_dead_event) {
+ pthread_mutex_lock(&cq_dead_mutex);
+ dead = malloc(sizeof *dead);
+ if (dead) {
+ dead->cq = cq;
+ dead->next = cq_dead_list;
+ cq_dead_list = dead;
+ }
+ pthread_mutex_unlock(&cq_dead_mutex);
+ }
+
+ return ret;
}
+int ibv_get_cq_event_v1(struct ibv_context *context, int comp_num,
+ struct ibv_cq **cq, void **cq_context)
+{
+ struct ibv_dead_cq *dead;
+ struct ibv_comp_event_v1 ev;
+
+ /*
+ * Check if there are any synthetic "dead CQ" events before
+ * trying to read our file descriptor. There are some race
+ * windows here -- for example, we might enter this function
+ * to early to see a dead CQ event and then block forever if
+ * no real CQ events are generated later.
+ */
+
+ pthread_mutex_lock(&cq_dead_mutex);
+ if (cq_dead_list != NULL) {
+ *cq = cq_dead_list->cq;
+ *cq_context = (*cq)->cq_context;
+ dead = cq_dead_list;
+ cq_dead_list = dead->next;
+ free(dead);
+ pthread_mutex_unlock(&cq_dead_mutex);
+
+ (*cq)->context->ops.free_cq(cq);
+ return 0;
+ }
+ pthread_mutex_unlock(&cq_dead_mutex);
+
+ if (read(context->cq_fd[comp_num], &ev, sizeof ev) != sizeof ev)
+ return -1;
+
+ *cq = (struct ibv_cq *) (uintptr_t) ev.cq_handle;
+ *cq_context = (*cq)->cq_context;
+
+ if ((*cq)->context->ops.cq_event)
+ (*cq)->context->ops.cq_event(*cq);
+
+ return 0;
+}
int ibv_get_cq_event(struct ibv_context *context, int comp_num,
- struct ibv_cq **cq, void **cq_context)
+ struct ibv_cq **cq, void **cq_context, int *is_dead)
{
struct ibv_comp_event ev;
if (comp_num < 0 || comp_num >= context->num_comp)
return -1;
+ if (abi_ver == 1) {
+ *is_dead = 0;
+ return ibv_get_cq_event_v1(context, comp_num, cq, cq_context);
+ }
+
if (read(context->cq_fd[comp_num], &ev, sizeof ev) != sizeof ev)
return -1;
*cq = (struct ibv_cq *) (uintptr_t) ev.cq_handle;
*cq_context = (*cq)->cq_context;
+ *is_dead = ev.is_dead;
- if ((*cq)->context->ops.cq_event)
+ if (ev.is_dead)
+ (*cq)->context->ops.free_cq(*cq);
+ else if ((*cq)->context->ops.cq_event)
(*cq)->context->ops.cq_event(*cq);
return 0;
--- libibverbs/src/cmd.c (revision 3296)
+++ libibverbs/src/cmd.c (working copy)
@@ -297,7 +297,8 @@ int ibv_cmd_destroy_cq(struct ibv_cq *cq
return ibv_cmd_destroy_cq_v1(cq);
IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_CQ, &resp, sizeof resp);
- cmd.cq_handle = cq->handle;
+ cmd.cq_handle = cq->handle;
+ cmd.dead_event = cq->need_dead_event;
if (write(cq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
return errno;
--- libibverbs/ChangeLog (revision 3296)
+++ libibverbs/ChangeLog (working copy)
@@ -1,3 +1,13 @@
+2005-09-04 Roland Dreier <roland at cisco.com>
+
+ * include/infiniband/verbs.h, include/infiniband/kern-abi.h,
+ src/verbs.c, src/cmd.c, examples/rc_pingpong.c,
+ examples/srq_pingpong.c, examples/uc_pingpong.c,
+ examples/ud_pingpong.c: Update to handle new kernel ABI for
+ avoiding stale completion events. Change ibv_get_cq_event() to
+ return "dead CQ" events, and synthesize these events for old
+ kernels.
+
2005-08-31 Roland Dreier <roland at cisco.com>
* include/infiniband/kern-abi.h, include/infiniband/verbs.h,
--- libibverbs/examples/rc_pingpong.c (revision 3296)
+++ libibverbs/examples/rc_pingpong.c (working copy)
@@ -604,12 +604,18 @@ int main(int argc, char *argv[])
if (use_event) {
struct ibv_cq *ev_cq;
void *ev_ctx;
+ int dead;
- if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+ if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx, &dead)) {
fprintf(stderr, "Failed to get cq_event\n");
return 1;
}
+ if (dead) {
+ fprintf(stderr, "Unexpected CQ dead event\n");
+ return 1;
+ }
+
if (ev_cq != ctx->cq) {
fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
return 1;
--- libibverbs/examples/srq_pingpong.c (revision 3296)
+++ libibverbs/examples/srq_pingpong.c (working copy)
@@ -677,12 +677,18 @@ int main(int argc, char *argv[])
if (use_event) {
struct ibv_cq *ev_cq;
void *ev_ctx;
+ int dead;
- if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+ if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx, &dead)) {
fprintf(stderr, "Failed to get cq_event\n");
return 1;
}
+ if (dead) {
+ fprintf(stderr, "Unexpected CQ dead event\n");
+ return 1;
+ }
+
if (ev_cq != ctx->cq) {
fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
return 1;
--- libibverbs/examples/uc_pingpong.c (revision 3296)
+++ libibverbs/examples/uc_pingpong.c (working copy)
@@ -596,12 +596,18 @@ int main(int argc, char *argv[])
if (use_event) {
struct ibv_cq *ev_cq;
void *ev_ctx;
+ int dead;
- if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+ if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx, &dead)) {
fprintf(stderr, "Failed to get cq_event\n");
return 1;
}
+ if (dead) {
+ fprintf(stderr, "Unexpected CQ dead event\n");
+ return 1;
+ }
+
if (ev_cq != ctx->cq) {
fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
return 1;
--- libibverbs/examples/ud_pingpong.c (revision 3296)
+++ libibverbs/examples/ud_pingpong.c (working copy)
@@ -600,12 +600,18 @@ int main(int argc, char *argv[])
if (use_event) {
struct ibv_cq *ev_cq;
void *ev_ctx;
+ int dead;
- if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+ if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx, &dead)) {
fprintf(stderr, "Failed to get cq_event\n");
return 1;
}
+ if (dead) {
+ fprintf(stderr, "Unexpected CQ dead event\n");
+ return 1;
+ }
+
if (ev_cq != ctx->cq) {
fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
return 1;
More information about the general
mailing list