[openib-general] [PATCH] RDMA/iwcm: Bugs in cm_conn_req_handler()

Krishna Kumar2 krkumar2 at in.ibm.com
Mon Feb 12 21:06:56 PST 2007


Hi Steve,

Thanks for the explanation. I reviewed the patch and had two
comments :

1. When the set_bit(CALLBACK_DESTROY) was done, the
    refcount could be such so that the free_cm_id is not called,
    resulting in cm_id having work entries. But there are two
    places where a BUG_ON(!list_empty(work_list)) was added
    (before doing a free_cm_id()), both under check for
    CALLBACK_DESTROY. Isn't it possible for these BUG_ON's
    to get hit ? This is an error case and may not hit in normal
    testing.

2.
> @@ -647,10 +650,11 @@ static void cm_conn_req_handler(struct i
>     /* Call the client CM handler */
>     ret = cm_id->cm_handler(cm_id, iw_event);
>     if (ret) {
> +      iw_cm_reject(cm_id, NULL, 0);
>        set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
>        destroy_cm_id(cm_id);
>        if (atomic_read(&cm_id_priv->refcount)==0)
> -         kfree(cm_id);
> +         free_cm_id(cm_id_priv);
>     }

Though this is not a bug, the code just above this calls
iw_destroy_cm_id() if alloc_work_entries() failed. Is it
possible for the provider to get a reference to this cm_id
during the failed call to the client CM handler ? I didn't
think so, which is why in my original patch I had simply
called iw_destroy_cm_id here.

I had read your explanation about the provider possibly
acquiring a reference, but in this place aren't we calling
iw_conn_req_handler() which in turn cannot go to the
device and cache a reference count ?

The rest of the patch looks great. I am going to test this
today and will post the results.

Thanks,

- KK

> On Sat, 2007-02-10 at 14:36 -0600, Steve Wise wrote:
> > ugh.
> >
> > There is at least one bug in this patch.  I cannot call iw_cm_reject()
> > inside destroy_cm_id() because both functions grab the iw_cm lock...
> >
> >
>
> This patch puts the iw_cm_reject() calls back in
> cm_conn_req_handler()...
>
>
> ---
>
> iw_cm_id destruction race condition fixes.
>
> From: Steve Wise <swise at opengridcomputing.com>
>
> Several changes:
>
> - iwcm_deref_id() always wakes up if there's another reference.
>
> - clean up race condition in cm_work_handler().
>
> - create static void free_cm_id() which deallocs the work entries and
then
>   kfrees the cm_id memory.  This reduces code replication.
>
> - rem_ref() if this is the last reference -and- the IWCM owns freeing the

>   cm_id, then free it.
>
> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
> Signed-off-by: Tom Tucker <tom at opengridcomputing.com>
> ---
>
>  drivers/infiniband/core/iwcm.c |   47
> +++++++++++++++++++++-------------------
>  1 files changed, 25 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/infiniband/core/iwcm.c
b/drivers/infiniband/core/iwcm.c
> index 1039ad5..891d1fa 100644
> --- a/drivers/infiniband/core/iwcm.c
> +++ b/drivers/infiniband/core/iwcm.c
> @@ -146,6 +146,12 @@ static int copy_private_data(struct iw_c
>     return 0;
>  }
>
> +static void free_cm_id(struct iwcm_id_private *cm_id_priv)
> +{
> +   dealloc_work_entries(cm_id_priv);
> +   kfree(cm_id_priv);
> +}
> +
>  /*
>   * Release a reference on cm_id. If the last reference is being
>   * released, enable the waiting thread (in iw_destroy_cm_id) to
> @@ -153,21 +159,14 @@ static int copy_private_data(struct iw_c
>   */
>  static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
>  {
> -   int ret = 0;
> -
>     BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
>     if (atomic_dec_and_test(&cm_id_priv->refcount)) {
>        BUG_ON(!list_empty(&cm_id_priv->work_list));
> -      if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) {
> -         BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
> -         BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
> -               &cm_id_priv->flags));
> -         ret = 1;
> -      }
>        complete(&cm_id_priv->destroy_comp);
> +      return 1;
>     }
>
> -   return ret;
> +   return 0;
>  }
>
>  static void add_ref(struct iw_cm_id *cm_id)
> @@ -181,7 +180,11 @@ static void rem_ref(struct iw_cm_id *cm_
>  {
>     struct iwcm_id_private *cm_id_priv;
>     cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> -   iwcm_deref_id(cm_id_priv);
> +   if (iwcm_deref_id(cm_id_priv) &&
> +       test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
> +      BUG_ON(!list_empty(&cm_id_priv->work_list));
> +      free_cm_id(cm_id_priv);
> +   }
>  }
>
>  static int cm_event_handler(struct iw_cm_id *cm_id, struct
> iw_cm_event *event);
> @@ -355,7 +358,9 @@ static void destroy_cm_id(struct iw_cm_i
>     case IW_CM_STATE_CONN_RECV:
>        /*
>         * App called destroy before/without calling accept after
> -       * receiving connection request event notification.
> +       * receiving connection request event notification or
> +       * returned non zero from the event callback function.
> +       * In either case, must tell the provider to reject.
>         */
>        cm_id_priv->state = IW_CM_STATE_DESTROYING;
>        break;
> @@ -391,9 +396,7 @@ void iw_destroy_cm_id(struct iw_cm_id *c
>
>     wait_for_completion(&cm_id_priv->destroy_comp);
>
> -   dealloc_work_entries(cm_id_priv);
> -
> -   kfree(cm_id_priv);
> +   free_cm_id(cm_id_priv);
>  }
>  EXPORT_SYMBOL(iw_destroy_cm_id);
>
> @@ -647,10 +650,11 @@ static void cm_conn_req_handler(struct i
>     /* Call the client CM handler */
>     ret = cm_id->cm_handler(cm_id, iw_event);
>     if (ret) {
> +      iw_cm_reject(cm_id, NULL, 0);
>        set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
>        destroy_cm_id(cm_id);
>        if (atomic_read(&cm_id_priv->refcount)==0)
> -         kfree(cm_id);
> +         free_cm_id(cm_id_priv);
>     }
>
>  out:
> @@ -854,13 +858,12 @@ static void cm_work_handler(struct work_
>           destroy_cm_id(&cm_id_priv->id);
>        }
>        BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
> -      if (iwcm_deref_id(cm_id_priv))
> -         return;
> -
> -      if (atomic_read(&cm_id_priv->refcount)==0 &&
> -          test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
> -         dealloc_work_entries(cm_id_priv);
> -         kfree(cm_id_priv);
> +      if (iwcm_deref_id(cm_id_priv)) {
> +         if (test_bit(IWCM_F_CALLBACK_DESTROY,
> +                 &cm_id_priv->flags)) {
> +            BUG_ON(!list_empty(&cm_id_priv->work_list));
> +            free_cm_id(cm_id_priv);
> +         }
>           return;
>        }
>        spin_lock_irqsave(&cm_id_priv->lock, flags);
>
>
>
> _______________________________________________
> openib-general mailing list
> openib-general at openib.org
> http://openib.org/mailman/listinfo/openib-general
>
> To unsubscribe, please visit
http://openib.org/mailman/listinfo/openib-general
>





More information about the general mailing list