[openib-general] [PATCH] (resend) mthca_eq improvements

Michael S. Tsirkin mst at mellanox.co.il
Mon Jan 17 06:31:14 PST 2005


Sorry, forgot the patch.

========

Hello, Roland, all!
Here are some mthca eq improvements, rolled into one patch.
These are all touching the same code and so are related,
but if its too big to be readable, let me know and I'll try to split
it up.

With these changes I am now getting at least 10% more bandwiths
on a non-MSI system than before.

Issues addressed are:

1. general interrupt handling speedup
   I noticed that several operations where performed multiple times,
   for example get_eqe was called multiple times, as another example,
   consumer index was truncated to eq size upon each eqe found.

   These cleanups get me some 5% more bandwith in ip over ib, I expect
   the to carry forward to MSI systems too.

   This last change (truncating consumer index only once) is also
   needed for future memfree support, since memfree has full 32 bit
   indices.

2. non-MSI interrupt handling simplification
   This is the 3'd chunk from the end.

   I also noticed that interrupt handler re-read the ecr after
   handling all eqs, in case more eqes were written. However, consider
   that the interrupt is *not* cleared second if this happens.

   As a result, after we get out of the handler, we will get an extra
   interrupt with all bits in ecr cleared. And since this is a shared
   interrut, we'll report it as not handled, further confusing the
   system.

   And re-checking ecr is not actually needed, since if some bits are
   set, hardware will assert the interrupt and we will get back to the
   handler eventually.

   Changing this gets me another 5% bandwiths, but only on
   non-MSI system, naturally.

3. Added memfree support in the eq initialisation command.
   This is in the last 2 chunks.

Please comment.
MST

Interrupt handler speedup and start of memfree eq support.

Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>

Index: hw/mthca/mthca_provider.h
===================================================================
--- hw/mthca/mthca_provider.h	(revision 1540)
+++ hw/mthca/mthca_provider.h	(working copy)
@@ -70,7 +70,7 @@ struct mthca_eq {
 	u16                    msi_x_entry;
 	int                    have_irq;
 	int                    nent;
-	int                    cons_index;
+	unsigned               cons_index;
 	struct mthca_buf_list *page_list;
 	struct mthca_mr        mr;
 };
Index: hw/mthca/mthca_eq.c
===================================================================
--- hw/mthca/mthca_eq.c	(revision 1540)
+++ hw/mthca/mthca_eq.c	(working copy)
@@ -164,12 +164,12 @@ static inline u64 async_mask(struct mthc
 		MTHCA_ASYNC_EVENT_MASK;
 }
 
-static inline void set_eq_ci(struct mthca_dev *dev, int eqn, int ci)
+static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
 {
 	u32 doorbell[2];
 
-	doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eqn);
-	doorbell[1] = cpu_to_be32(ci);
+	doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
+	doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));
 
 	mthca_write64(doorbell,
 		      dev->kar + MTHCA_EQ_DOORBELL,
@@ -200,21 +200,22 @@ static inline void disarm_cq(struct mthc
 		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 }
 
-static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, int entry)
+static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry)
 {
-	return eq->page_list[entry * MTHCA_EQ_ENTRY_SIZE / PAGE_SIZE].buf
-		+ (entry * MTHCA_EQ_ENTRY_SIZE) % PAGE_SIZE;
+	unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE;
+	return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
 }
 
-static inline int next_eqe_sw(struct mthca_eq *eq)
+static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq)
 {
-	return !(MTHCA_EQ_ENTRY_OWNER_HW &
-		 get_eqe(eq, eq->cons_index)->owner);
+	struct mthca_eqe* eqe;
+	eqe = get_eqe(eq, eq->cons_index);
+	return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
 }
 
-static inline void set_eqe_hw(struct mthca_eq *eq, int entry)
+static inline void set_eqe_hw(struct mthca_eqe *eqe)
 {
-	get_eqe(eq, entry)->owner =  MTHCA_EQ_ENTRY_OWNER_HW;
+	eqe->owner =  MTHCA_EQ_ENTRY_OWNER_HW;
 }
 
 static void port_change(struct mthca_dev *dev, int port, int active)
@@ -231,14 +232,14 @@ static void port_change(struct mthca_dev
 	ib_dispatch_event(&record);
 }
 
-static void mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
+static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
 {
 	struct mthca_eqe *eqe;
 	int disarm_cqn;
+	int  eqes_found = 0;
 
-	while (next_eqe_sw(eq)) {
+	while ((eqe = next_eqe_sw(eq))) {
 		int set_ci = 0;
-		eqe = get_eqe(eq, eq->cons_index);
 
 		/*
 		 * Make sure we read EQ entry contents after we've
@@ -328,12 +329,13 @@ static void mthca_eq_int(struct mthca_de
 			break;
 		};
 
-		set_eqe_hw(eq, eq->cons_index);
-		eq->cons_index = (eq->cons_index + 1) & (eq->nent - 1);
+		set_eqe_hw(eqe);
+		++eq->cons_index;
+		eqes_found = 1;
 
 		if (set_ci) {
 			wmb(); /* see comment below */
-			set_eq_ci(dev, eq->eqn, eq->cons_index);
+			set_eq_ci(dev, eq, eq->cons_index);
 			set_ci = 0;
 		}
 	}
@@ -347,9 +349,13 @@ static void mthca_eq_int(struct mthca_de
 	 * possibility of the HCA writing an entry and then
 	 * having set_eqe_hw() overwrite the owner field.
 	 */
-	wmb();
-	set_eq_ci(dev, eq->eqn, eq->cons_index);
+	if (likely(eqes_found)) {
+		wmb();
+		set_eq_ci(dev, eq, eq->cons_index);
+	}
 	eq_req_not(dev, eq->eqn);
+
+	return eqes_found;
 }
 
 static irqreturn_t mthca_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
@@ -440,7 +447,7 @@ static int __devinit mthca_create_eq(str
 	}
 
 	for (i = 0; i < nent; ++i)
-		set_eqe_hw(eq, i);
+		set_eqe_hw(get_eqe(eq, i));
 
 	eq->eqn = mthca_alloc(&dev->eq_table.alloc);
 	if (eq->eqn == -1)
@@ -362,7 +368,7 @@ static irqreturn_t mthca_interrupt(int i
 	if (dev->eq_table.clr_mask)
 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
 
-	while ((ecr = readl(dev->hcr + MTHCA_ECR_OFFSET + 4)) != 0) {
+	if ((ecr = readl(dev->hcr + MTHCA_ECR_OFFSET + 4))) {
 		work = 1;
 
 		writel(ecr, dev->hcr + MTHCA_ECR_CLR_OFFSET + 4);
@@ -399,6 +405,7 @@ static int __devinit mthca_create_eq(str
 	void *mailbox = NULL;
 	struct mthca_eq_context *eq_context;
 	int err = -ENOMEM;
+	int page;
 	int i;
 	u8 status;
 
@@ -463,8 +470,13 @@ static int __devinit mthca_create_eq(str
 						  MTHCA_EQ_STATE_ARMED |
 						  MTHCA_EQ_FLAG_TR);
 	eq_context->start           = cpu_to_be64(0);
-	eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 |
-						  MTHCA_KAR_PAGE);
+
+	if (dev->hca_type == ARBEL_NATIVE)
+		page = MTHCA_KAR_PAGE;
+	else
+		page = 0;
+
+	eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 | page);
 	eq_context->pd              = cpu_to_be32(dev->driver_pd.pd_num);
 	eq_context->intr            = intr;
 	eq_context->lkey            = cpu_to_be32(eq->mr.ibmr.lkey);



More information about the general mailing list