[openib-general] Re: [PATCH] [RFC] - dapl - dat_ep_free() can return without freeing the endpoint
Steve Wise
swise at opengridcomputing.com
Tue Apr 4 15:30:19 PDT 2006
> resources (PZ, etc.). We must have been getting lucky on IB.
>
> I'm worried that a callback could occur ...
>
> void dapli_destroy_conn(struct dapl_cm_id *conn)
> {
> int in_callback;
> struct rdma_cm_id *cm_id;
>
> dapl_dbg_log(DAPL_DBG_TYPE_CM,
> " destroy_conn: conn %p id %d\n",
> conn,conn->cm_id);
>
> dapl_os_lock(&conn->lock);
> conn->destroy = 1;
> in_callback = conn->in_callback;
> do {
> if (in_callback) {
> dapl_os_unlock(&conn->lock);
> usleep(10);
> dapl_os_lock(&conn->lock);
> }
> in_callback = conn->in_callback;
> } while (in_callback);
>
> if (conn->ep)
> conn->ep->cm_handle = IB_INVALID_HANDLE;
> cm_id = conn->cm_id;
> conn->cm_id = NULL;
> dapl_os_unlock(&conn->lock);
>
> /* ... here */
>
> if (cm_id) {
> if (cm_id->qp)
> rdma_destroy_qp(cm_id);
> rdma_destroy_id(cm_id);
> }
> dapl_os_free(conn, sizeof(*conn));
> }
>
> Destroying the cm_id while in a callback would be bad.
>
> > static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn,
> > @@ -243,11 +249,9 @@
> > return new_conn;
> > }
> >
> > -static int dapli_cm_active_cb(struct dapl_cm_id *conn,
> > +static void dapli_cm_active_cb(struct dapl_cm_id *conn,
> > struct rdma_cm_event *event)
> > {
> > - int destroy;
> > -
> > dapl_dbg_log(DAPL_DBG_TYPE_CM,
> > " active_cb: conn %p id %d event %d\n",
> > conn, conn->cm_id, event->event );
> > @@ -255,7 +259,7 @@
> > dapl_os_lock(&conn->lock);
> > if (conn->destroy) {
> > dapl_os_unlock(&conn->lock);
> > - return 0;
> > + return;
> > }
> > conn->in_callback = 1;
> > dapl_os_unlock(&conn->lock);
> > @@ -303,16 +307,14 @@
> > }
> >
> > dapl_os_lock(&conn->lock);
> > - destroy = conn->destroy;
> > - conn->in_callback = conn->destroy;
> > + conn->in_callback = 0;
> > dapl_os_unlock(&conn->lock);
> > - return(destroy);
> > + return;
> > }
> >
> > -static int dapli_cm_passive_cb(struct dapl_cm_id *conn,
> > +static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
> > struct rdma_cm_event *event)
> > {
> > - int destroy;
> > struct dapl_cm_id *new_conn;
> >
> > dapl_dbg_log(DAPL_DBG_TYPE_CM,
> > @@ -322,7 +324,7 @@
> > dapl_os_lock(&conn->lock);
> > if (conn->destroy) {
> > dapl_os_unlock(&conn->lock);
> > - return 0;
> > + return;
> > }
> > conn->in_callback = 1;
> > dapl_os_unlock(&conn->lock);
> > @@ -377,10 +379,9 @@
> > }
> >
> > dapl_os_lock(&conn->lock);
> > - destroy = conn->destroy;
> > - conn->in_callback = conn->destroy;
> > + conn->in_callback = 0;
> > dapl_os_unlock(&conn->lock);
> > - return(destroy);
> > + return;
> > }
> >
> >
> > @@ -1021,7 +1022,6 @@
> > /* process one CM event, fairness */
> > if(!ret) {
> > struct dapl_cm_id *conn;
> > - int ret;
> >
> > /* set proper conn from cm_id context*/
> > if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
> > @@ -1059,24 +1059,9 @@
> > case RDMA_CM_EVENT_DISCONNECTED:
> > /* passive or active */
> > if (conn->sp)
> > - ret = dapli_cm_passive_cb(conn,event);
> > + dapli_cm_passive_cb(conn,event);
> > else
> > - ret = dapli_cm_active_cb(conn,event);
> > -
> > - /* destroy both qp and cm_id */
> > - if (ret) {
> > - dapl_dbg_log(DAPL_DBG_TYPE_CM,
> > - " cma_cb: DESTROY conn %p"
> > - " cm_id %p qp %p\n",
> > - conn, conn->cm_id,
> > - conn->cm_id->qp);
> > -
> > - if (conn->cm_id->qp)
> > - rdma_destroy_qp(conn->cm_id);
> > -
> > - rdma_destroy_id(conn->cm_id);
> > - dapl_os_free(conn, sizeof(*conn));
> > - }
> > + dapli_cm_active_cb(conn,event);
>
> If this code is removed, we'll need to update functions that set
> conn->destroy to 1 to destroy the cm_id.
>
I thought only dapli_destroy_conn() sets this, but now I see that
dapli_thread() can also set this so I'll go check this out.
> What happens if a consumer attempts to free the EP from a callback?
There are no direct consumer callbacks in usermode are there? consumers
call dat_evd_wait() or whatever and get scheduled. Not like kernel
mode... Or am I confused?
> With this change (or any one that blocked a callback thread from
> attempting to free the EP), I believe we would deadlock.
>
> Is it possible to destroy CM IDs from within a callback?
I'll go verify this again, but I don't think the callback dapl thread
does this.
Stevo.
More information about the general
mailing list