[openib-general] [PATCH] use mmiowb after doorbell ring

akepner at sgi.com akepner at sgi.com
Sun Oct 15 02:02:44 PDT 2006


We discovered a problem when running IPoIB applications on
multiple CPUs on an Altix system. Many messages such as:

ib_mthca 0002:01:00.0: SQ 000014 full (19941644 head, 19941707 tail, 64 max, 0 nreq)

appear in syslog, and the driver wedges up.

Apparently this is because writes to the doorbells from
different CPUs are clobbering one another. The following
patch adds mmiowb() calls after doorbell rings to ensure
the doorbell register updates are ordered.

Signed-off-by: <akepner at sgi.com>

---
diff -rpu openib-1.1.orig/drivers/infiniband/hw/mthca/mthca_cq.c openib-1.1/drivers/infiniband/hw/mthca/mthca_cq.c
--- openib-1.1.orig/drivers/infiniband/hw/mthca/mthca_cq.c	2006-10-15 00:23:07.474893244 -0700
+++ openib-1.1/drivers/infiniband/hw/mthca/mthca_cq.c	2006-10-15 00:25:03.601978852 -0700
@@ -41,6 +41,8 @@

  #include <rdma/ib_pack.h>

+#include <asm/io.h>
+
  #include "mthca_dev.h"
  #include "mthca_cmd.h"
  #include "mthca_memfree.h"
@@ -314,6 +316,9 @@ void mthca_cq_clean(struct mthca_dev *de
  		wmb();
  		cq->cons_index += nfreed;
  		update_cons_index(dev, cq, nfreed);
+		/* use mmiowb to ensure update is ordered properly 
+		 * prior to releasing the spinlock */
+		mmiowb();
  	}

  	spin_unlock_irq(&cq->lock);
@@ -711,6 +716,11 @@ repoll:
  		}
  	}

+	if (freed) {
+		/* we rang the MTHCA_CQ_DOORBELL so use mmiowb 
+		 * to make sure it is ordered properly */
+		mmiowb();
+	}
  	spin_unlock_irqrestore(&cq->lock, flags);

  	return err == 0 || err == -EAGAIN ? npolled : err;
diff -rpu openib-1.1.orig/drivers/infiniband/hw/mthca/mthca_qp.c openib-1.1/drivers/infiniband/hw/mthca/mthca_qp.c
--- openib-1.1.orig/drivers/infiniband/hw/mthca/mthca_qp.c	2006-10-15 00:23:20.126932247 -0700
+++ openib-1.1/drivers/infiniband/hw/mthca/mthca_qp.c	2006-10-15 00:25:03.613697320 -0700
@@ -43,6 +43,8 @@
  #include <rdma/ib_cache.h>
  #include <rdma/ib_pack.h>

+#include <asm/io.h>
+
  #include "mthca_dev.h"
  #include "mthca_cmd.h"
  #include "mthca_memfree.h"
@@ -1730,6 +1732,9 @@ out:
  		mthca_write64(doorbell,
  			      dev->kar + MTHCA_SEND_DOORBELL,
  			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+		/* use mmiowb to ensure write to doorbell is ordered 
+		 * before releasing spinlock */
+		mmiowb();
  	}

  	qp->sq.next_ind = ind;
@@ -1849,6 +1854,9 @@ out:
  	qp->rq.next_ind = ind;
  	qp->rq.head    += nreq;

+	/* use mmiowb to ensure writes to doorbell are ordered 
+	 * before releasing spinlock */
+	mmiowb();
  	spin_unlock_irqrestore(&qp->rq.lock, flags);
  	return err;
  }
@@ -2110,6 +2118,9 @@ out:
  			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
  	}

+	/* use mmiowb to ensure writes to doorbell are ordered 
+	 * before releasing spinlock */
+	mmiowb();
  	spin_unlock_irqrestore(&qp->sq.lock, flags);
  	return err;
  }
diff -rpu openib-1.1.orig/drivers/infiniband/hw/mthca/mthca_srq.c openib-1.1/drivers/infiniband/hw/mthca/mthca_srq.c
--- openib-1.1.orig/drivers/infiniband/hw/mthca/mthca_srq.c	2006-10-15 00:23:25.428562360 -0700
+++ openib-1.1/drivers/infiniband/hw/mthca/mthca_srq.c	2006-10-15 00:25:03.626392326 -0700
@@ -35,6 +35,8 @@
  #include <linux/slab.h>
  #include <linux/string.h>

+#include <asm/io.h>
+
  #include "mthca_dev.h"
  #include "mthca_cmd.h"
  #include "mthca_memfree.h"
@@ -593,6 +595,9 @@ int mthca_tavor_post_srq_recv(struct ib_
  			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
  	}

+	/* use mmiowb to ensure writes to doorbell are ordered
+	 * before releasing spinlock */
+	mmiowb();
  	spin_unlock_irqrestore(&srq->lock, flags);
  	return err;
  }




-- 
Arthur






More information about the general mailing list