[openib-general] Re: SPAM: [PATCH] [RFC] - dapl - dat_ep_free() can return without freeing the endpoint
James Lentini
jlentini at netapp.com
Tue Apr 4 13:57:01 PDT 2006
Comments below:
On Tue, 4 Apr 2006, Steve Wise wrote:
> Index: openib_cma/dapl_ib_cm.c
> ===================================================================
> --- openib_cma/dapl_ib_cm.c (revision 6107)
> +++ openib_cma/dapl_ib_cm.c (working copy)
> @@ -62,9 +62,9 @@
> /* local prototypes */
> static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn,
> struct rdma_cm_event *event);
> -static int dapli_cm_active_cb(struct dapl_cm_id *conn,
> +static void dapli_cm_active_cb(struct dapl_cm_id *conn,
> struct rdma_cm_event *event);
> -static int dapli_cm_passive_cb(struct dapl_cm_id *conn,
> +static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
> struct rdma_cm_event *event);
> static void dapli_addr_resolve(struct dapl_cm_id *conn);
> static void dapli_route_resolve(struct dapl_cm_id *conn);
> @@ -164,28 +164,34 @@
> void dapli_destroy_conn(struct dapl_cm_id *conn)
> {
> int in_callback;
> + struct rdma_cm_id *cm_id;
>
> dapl_dbg_log(DAPL_DBG_TYPE_CM,
> " destroy_conn: conn %p id %d\n",
> conn,conn->cm_id);
> -
> dapl_os_lock(&conn->lock);
> conn->destroy = 1;
> in_callback = conn->in_callback;
> - dapl_os_unlock(&conn->lock);
> -
> - if (!in_callback) {
> - if (conn->ep)
> - conn->ep->cm_handle = IB_INVALID_HANDLE;
> - if (conn->cm_id) {
> - if (conn->cm_id->qp)
> - rdma_destroy_qp(conn->cm_id);
> - rdma_destroy_id(conn->cm_id);
> + do {
> + if (in_callback) {
> + dapl_os_unlock(&conn->lock);
> + usleep(10);
> + dapl_os_lock(&conn->lock);
> }
> -
> - conn->cm_id = NULL;
> - dapl_os_free(conn, sizeof(*conn));
> + in_callback = conn->in_callback;
> + } while (in_callback);
> +
> + if (conn->ep)
> + conn->ep->cm_handle = IB_INVALID_HANDLE;
> + cm_id = conn->cm_id;
> + conn->cm_id = NULL;
> + dapl_os_unlock(&conn->lock);
> + if (cm_id) {
> + if (cm_id->qp)
> + rdma_destroy_qp(cm_id);
> + rdma_destroy_id(cm_id);
> }
> + dapl_os_free(conn, sizeof(*conn));
> }
This is an improvement. If dat_ep_free() returns success, DAPL
consumers expect to be able to free the free'd EP's
resources (PZ, etc.). We must have been getting lucky on IB.
I'm worried that a callback could occur ...
void dapli_destroy_conn(struct dapl_cm_id *conn)
{
int in_callback;
struct rdma_cm_id *cm_id;
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" destroy_conn: conn %p id %d\n",
conn,conn->cm_id);
dapl_os_lock(&conn->lock);
conn->destroy = 1;
in_callback = conn->in_callback;
do {
if (in_callback) {
dapl_os_unlock(&conn->lock);
usleep(10);
dapl_os_lock(&conn->lock);
}
in_callback = conn->in_callback;
} while (in_callback);
if (conn->ep)
conn->ep->cm_handle = IB_INVALID_HANDLE;
cm_id = conn->cm_id;
conn->cm_id = NULL;
dapl_os_unlock(&conn->lock);
/* ... here */
if (cm_id) {
if (cm_id->qp)
rdma_destroy_qp(cm_id);
rdma_destroy_id(cm_id);
}
dapl_os_free(conn, sizeof(*conn));
}
Destroying the cm_id while in a callback would be bad.
> static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn,
> @@ -243,11 +249,9 @@
> return new_conn;
> }
>
> -static int dapli_cm_active_cb(struct dapl_cm_id *conn,
> +static void dapli_cm_active_cb(struct dapl_cm_id *conn,
> struct rdma_cm_event *event)
> {
> - int destroy;
> -
> dapl_dbg_log(DAPL_DBG_TYPE_CM,
> " active_cb: conn %p id %d event %d\n",
> conn, conn->cm_id, event->event );
> @@ -255,7 +259,7 @@
> dapl_os_lock(&conn->lock);
> if (conn->destroy) {
> dapl_os_unlock(&conn->lock);
> - return 0;
> + return;
> }
> conn->in_callback = 1;
> dapl_os_unlock(&conn->lock);
> @@ -303,16 +307,14 @@
> }
>
> dapl_os_lock(&conn->lock);
> - destroy = conn->destroy;
> - conn->in_callback = conn->destroy;
> + conn->in_callback = 0;
> dapl_os_unlock(&conn->lock);
> - return(destroy);
> + return;
> }
>
> -static int dapli_cm_passive_cb(struct dapl_cm_id *conn,
> +static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
> struct rdma_cm_event *event)
> {
> - int destroy;
> struct dapl_cm_id *new_conn;
>
> dapl_dbg_log(DAPL_DBG_TYPE_CM,
> @@ -322,7 +324,7 @@
> dapl_os_lock(&conn->lock);
> if (conn->destroy) {
> dapl_os_unlock(&conn->lock);
> - return 0;
> + return;
> }
> conn->in_callback = 1;
> dapl_os_unlock(&conn->lock);
> @@ -377,10 +379,9 @@
> }
>
> dapl_os_lock(&conn->lock);
> - destroy = conn->destroy;
> - conn->in_callback = conn->destroy;
> + conn->in_callback = 0;
> dapl_os_unlock(&conn->lock);
> - return(destroy);
> + return;
> }
>
>
> @@ -1021,7 +1022,6 @@
> /* process one CM event, fairness */
> if(!ret) {
> struct dapl_cm_id *conn;
> - int ret;
>
> /* set proper conn from cm_id context*/
> if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
> @@ -1059,24 +1059,9 @@
> case RDMA_CM_EVENT_DISCONNECTED:
> /* passive or active */
> if (conn->sp)
> - ret = dapli_cm_passive_cb(conn,event);
> + dapli_cm_passive_cb(conn,event);
> else
> - ret = dapli_cm_active_cb(conn,event);
> -
> - /* destroy both qp and cm_id */
> - if (ret) {
> - dapl_dbg_log(DAPL_DBG_TYPE_CM,
> - " cma_cb: DESTROY conn %p"
> - " cm_id %p qp %p\n",
> - conn, conn->cm_id,
> - conn->cm_id->qp);
> -
> - if (conn->cm_id->qp)
> - rdma_destroy_qp(conn->cm_id);
> -
> - rdma_destroy_id(conn->cm_id);
> - dapl_os_free(conn, sizeof(*conn));
> - }
> + dapli_cm_active_cb(conn,event);
If this code is removed, we'll need to update functions that set
conn->destroy to 1 to destroy the cm_id.
What happens if a consumer attempts to free the EP from a callback?
With this change (or any one that blocked a callback thread from
attempting to free the EP), I believe we would deadlock.
Is it possible to destroy CM IDs from within a callback?
More information about the general
mailing list