[ofw] ConnectX functionality is completely broken

Sean Hefty sean.hefty at intel.com
Thu Jul 31 12:28:03 PDT 2008


I don't understand the context value issue yet.  I did test IBAL on mthca with
IPoIB and userspace tests.  If the context were incorrect, I would have expected
to see an issue, and the context values into the winverbs driver were correct.
Can you provide a little more details on the problem?  I'd like to understand
where the issues are in the assumptions that were made in the 1435 patch.

 

 

Find attached a fix, that will maybe solve the problem.

Maybe, because i haven't time to check it and this work week (down here) is
ended.

I'll check the patch next week.

 

About the patch.

 

First of all, special thanks to Anatoly for the right pointing to 1435 patch.

 

To recall, 1435 patch has improved event notification mechanism for cq, qp and
srq objects.

I found one problem in the patch, which repeats itself for all three objects and
for both drivers: new event handlers get the old (and wrong) context values.

The new (and right) context values are nor used. As a result, IBAL callbacks are
called with wrong handle parameter, which ends up with crash.

 

 

Index: hw/mlx4/kernel/bus/inc/ib_verbs.h
===================================================================
--- hw/mlx4/kernel/bus/inc/ib_verbs.h (revision 1452)
+++ hw/mlx4/kernel/bus/inc/ib_verbs.h (working copy)
@@ -742,7 +742,6 @@
  void *             cq_context;
  int                cqe;
  atomic_t           usecnt; /* count number of work queues */
- struct ib_cq_ex    x;
 };
 
 struct ib_srq {
@@ -752,7 +751,6 @@
  void        (*event_handler)(ib_event_rec_t *);
  void         *srq_context;
  atomic_t  usecnt;
- struct ib_srq_ex        x;
 };
 
 struct ib_qp {
@@ -766,7 +764,6 @@
  void         *qp_context;
  u32   qp_num;
  enum ib_qp_type  qp_type;
- struct ib_qp_ex         x;
 };
 
 struct ib_mr {
Index: hw/mlx4/kernel/bus/inc/ib_verbs_ex.h
===================================================================
--- hw/mlx4/kernel/bus/inc/ib_verbs_ex.h (revision 1452)
+++ hw/mlx4/kernel/bus/inc/ib_verbs_ex.h (working copy)
@@ -73,24 +73,6 @@
  int     fw_if_open;
 };
 
-/* extension for ib_cq */
-struct ib_cq_ex 
-{
- void *    ctx;  /* IBAL CQ context */
-};
-
-/* extension for ib_qp */
-struct ib_qp_ex 
-{
- void *    ctx;  /* IBAL QP context */
-};
-
-/* extension for ib_srq */
-struct ib_srq_ex 
-{
- void *    ctx;  /* IBAL SRQ context */
-};
-
 /* extension for ib_event */
 struct ib_event_ex 
 {
Index: hw/mlx4/kernel/hca/cq.c
===================================================================
--- hw/mlx4/kernel/hca/cq.c (revision 1452)
+++ hw/mlx4/kernel/hca/cq.c (working copy)
@@ -89,7 +89,7 @@
  // allocate cq 
  p_ib_cq = ibv_create_cq(p_ibdev, 
   cq_comp_handler, event_handler,
-  p_hca, *p_size, p_uctx, p_umv_buf );
+  (void*)cq_context, *p_size, p_uctx, p_umv_buf );
  if (IS_ERR(p_ib_cq)) {
   err = PTR_ERR(p_ib_cq);
   HCA_PRINT (TRACE_LEVEL_ERROR ,HCA_DBG_CQ, ("ibv_create_cq failed (%d)\n",
err));
@@ -97,9 +97,6 @@
   goto err_create_cq;
  }
 
- // fill the object
- p_ib_cq->x.ctx = (void*)cq_context;
- 
  // return the result
  *p_size = p_ib_cq->cqe;
 
Index: hw/mlx4/kernel/hca/qp.c
===================================================================
--- hw/mlx4/kernel/hca/qp.c (revision 1452)
+++ hw/mlx4/kernel/hca/qp.c (working copy)
@@ -100,8 +100,6 @@
  struct ib_qp_init_attr qp_init_attr;
  struct ib_ucontext *p_uctx = NULL;
  struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd;
- struct ib_device *p_ib_dev = p_ib_pd->device;
- mlnx_hca_t *p_hca = ibdev2hca(p_ib_dev);
  struct ibv_create_qp *p_req = NULL;
  
  HCA_ENTER(HCA_DBG_QP);
@@ -121,7 +119,7 @@
  // prepare the parameters
  RtlZeroMemory(&qp_init_attr, sizeof(qp_init_attr));
  qp_init_attr.event_handler = event_handler;
- qp_init_attr.qp_context = p_hca;
+ qp_init_attr.qp_context = (void*)qp_uctx;
  qp_init_attr.send_cq = (struct ib_cq *)p_create_attr->h_sq_cq;
  qp_init_attr.recv_cq = (struct ib_cq *)p_create_attr->h_rq_cq;
  qp_init_attr.srq = (struct ib_srq *)p_create_attr->h_srq;
@@ -153,9 +151,6 @@
   goto err_create_qp;
  }
 
- // fill the object
- p_ib_qp->x.ctx = (void*)qp_uctx;
-
  // Query QP to obtain requested attributes
  if (p_qp_attr) {
   status = mlnx_query_qp((ib_qp_handle_t)p_ib_qp, p_qp_attr, p_umv_buf);
Index: hw/mlx4/kernel/hca/srq.c
===================================================================
--- hw/mlx4/kernel/hca/srq.c (revision 1452)
+++ hw/mlx4/kernel/hca/srq.c (working copy)
@@ -55,8 +55,6 @@
  struct ib_srq_init_attr srq_init_attr;
  struct ib_ucontext *p_uctx = NULL;
  struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd;
- struct ib_device *p_ib_dev = p_ib_pd->device;
- mlnx_hca_t *p_hca = ibdev2hca(p_ib_dev);
 
  HCA_ENTER(HCA_DBG_SRQ);
 
@@ -75,7 +73,7 @@
  // prepare the parameters
  RtlZeroMemory(&srq_init_attr, sizeof(srq_init_attr));
  srq_init_attr.event_handler = event_handler;
- srq_init_attr.srq_context = p_hca;
+ srq_init_attr.srq_context = (void*)srq_context;
  srq_init_attr.attr.max_wr = p_srq_attr->max_wr;
  srq_init_attr.attr.max_sge = p_srq_attr->max_sge;
  srq_init_attr.attr.srq_limit = p_srq_attr->srq_limit;
@@ -88,7 +86,6 @@
   status = errno_to_iberr(err);
   goto err_create_srq;
  }
- p_ib_srq->x.ctx = (void*)srq_context;
 
  // return the result
  if (ph_srq) *ph_srq = (ib_srq_handle_t)p_ib_srq;
Index: hw/mthca/kernel/hca_verbs.c
===================================================================
--- hw/mthca/kernel/hca_verbs.c (revision 1452)
+++ hw/mthca/kernel/hca_verbs.c (working copy)
@@ -870,12 +870,10 @@
  int err;
  ib_api_status_t  status;
  struct ib_srq *ib_srq_p;
- struct mthca_srq *srq_p;
  struct ib_srq_init_attr srq_init_attr;
  struct ib_ucontext *p_context = NULL;
  struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd;
  struct ib_device *ib_dev = ib_pd_p->device;
- mlnx_hob_t  *hob_p = HOB_FROM_IBDEV(ib_dev);
 
  HCA_ENTER(HCA_DBG_SRQ);
 
@@ -894,7 +892,7 @@
  // prepare the parameters
  RtlZeroMemory(&srq_init_attr, sizeof(srq_init_attr));
  srq_init_attr.event_handler = event_handler;
- srq_init_attr.srq_context = hob_p;
+ srq_init_attr.srq_context = (void*)srq_context;
  srq_init_attr.attr = *p_srq_attr;
 
  // allocate srq 
@@ -906,12 +904,8 @@
   goto err_create_srq;
  }
 
- // fill the object
- srq_p = (struct mthca_srq *)ib_srq_p;
- srq_p->srq_context = (void*)srq_context;
- 
  // return the result
- if (ph_srq) *ph_srq = (ib_srq_handle_t)srq_p;
+ if (ph_srq) *ph_srq = (ib_srq_handle_t)ib_srq_p;
 
  status = IB_SUCCESS;
  
@@ -1044,7 +1038,6 @@
  struct ib_ucontext *p_context = NULL;
  struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd;
  struct ib_device *ib_dev = ib_pd_p->device;
- mlnx_hob_t  *hob_p = HOB_FROM_IBDEV(ib_dev);
  
  HCA_ENTER(HCA_DBG_QP);
 
@@ -1063,7 +1056,7 @@
  RtlZeroMemory(&qp_init_attr, sizeof(qp_init_attr));
  qp_init_attr.qp_type = p_create_attr->qp_type;
  qp_init_attr.event_handler = event_handler;
- qp_init_attr.qp_context = hob_p;
+ qp_init_attr.qp_context = (void*)qp_context;
  qp_init_attr.recv_cq = (struct ib_cq *)p_create_attr->h_rq_cq;
  qp_init_attr.send_cq = (struct ib_cq *)p_create_attr->h_sq_cq;
  qp_init_attr.srq = (struct ib_srq *)p_create_attr->h_srq;
@@ -1087,7 +1080,6 @@
 
  // fill the object
  qp_p = (struct mthca_qp *)ib_qp_p;
- qp_p->qp_context = (void*)qp_context;
  qp_p->qp_init_attr = qp_init_attr;
 
  // Query QP to obtain requested attributes
@@ -1401,7 +1393,6 @@
  int err;
  ib_api_status_t  status;
  struct ib_cq *ib_cq_p;
- struct mthca_cq *cq_p;
  mlnx_hob_t   *hob_p;
  struct ib_device *ib_dev;
  struct ib_ucontext *p_context;
@@ -1437,7 +1428,7 @@
  // allocate cq 
  ib_cq_p = ibv_create_cq(ib_dev, 
   cq_comp_handler, event_handler,
-  hob_p, *p_size, p_context, p_umv_buf );
+  (void*)cq_context, *p_size, p_context, p_umv_buf );
  if (IS_ERR(ib_cq_p)) {
   err = PTR_ERR(ib_cq_p);
   HCA_PRINT (TRACE_LEVEL_ERROR ,HCA_DBG_CQ, ("ibv_create_cq failed (%d)\n",
err));
@@ -1445,15 +1436,11 @@
   goto err_create_cq;
  }
 
- // fill the object
- cq_p = (struct mthca_cq *)ib_cq_p;
- cq_p->cq_context = (void*)cq_context;
- 
  // return the result
 // *p_size = *p_size; // return the same value
  *p_size = ib_cq_p->cqe;
 
- if (ph_cq) *ph_cq = (ib_cq_handle_t)cq_p;
+ if (ph_cq) *ph_cq = (ib_cq_handle_t)ib_cq_p;
 
  status = IB_SUCCESS;
  
Index: hw/mthca/kernel/mthca_cq.c
===================================================================
--- hw/mthca/kernel/mthca_cq.c (revision 1452)
+++ hw/mthca/kernel/mthca_cq.c (working copy)
@@ -237,7 +237,7 @@
    ++cq->arm_sn;
  }
 
- cq->ibcq.comp_handler(cq->cq_context);
+ cq->ibcq.comp_handler(cq->ibcq.cq_context);
 }
 
 void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
Index: hw/mthca/kernel/mthca_provider.h
===================================================================
--- hw/mthca/kernel/mthca_provider.h (revision 1452)
+++ hw/mthca/kernel/mthca_provider.h (working copy)
@@ -189,7 +189,6 @@
 
 struct mthca_cq {
  struct ib_cq           ibcq;
- void      *cq_context; // leo: for IBAL shim
  spinlock_t             lock;
  atomic_t               refcount;
  int                    cqn;
@@ -234,7 +233,6 @@
 
  wait_queue_head_t wait;
  KMUTEX   mutex;
- void    *srq_context; 
 };
 
 struct mthca_wq {
@@ -254,7 +252,6 @@
 
 struct mthca_qp {
  struct ib_qp           ibqp;
- void      *qp_context; // leo: for IBAL shim
  //TODO: added just because absense of ibv_query_qp
  // thereafter it may be worth to be replaced by struct ib_qp_attr qp_attr;
  struct ib_qp_init_attr qp_init_attr; // leo: for query_qp

 

 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080731/22f038b6/attachment.html>


More information about the ofw mailing list