[openib-general] [PATCH][12/12] InfiniBand/mthca: remove x86 SSE pessimization
Roland Dreier
roland at topspin.com
Sun Jan 23 22:14:24 PST 2005
Get rid of the x86 SSE code for atomic 64-bit writes to doorbell
registers. Saving/setting CR0 plus a clts instruction are too
expensive for it to ever be a win, and the config option was just
confusing.
Signed-off-by: Roland Dreier <roland at topspin.com>
--- linux-bk.orig/drivers/infiniband/hw/mthca/Kconfig 2005-01-23 08:30:27.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/Kconfig 2005-01-23 21:00:44.744520064 -0800
@@ -14,13 +14,3 @@
This option causes the mthca driver produce a bunch of debug
messages. Select this is you are developing the driver or
trying to diagnose a problem.
-
-config INFINIBAND_MTHCA_SSE_DOORBELL
- bool "SSE doorbell code"
- depends on INFINIBAND_MTHCA && X86 && !X86_64
- default n
- ---help---
- This option will have the mthca driver use SSE instructions
- to ring hardware doorbell registers. This may improve
- performance for some workloads, but the driver will not run
- on processors without SSE instructions.
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 20:58:55.771086544 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 21:00:44.745519912 -0800
@@ -40,10 +40,6 @@
#include <linux/pci.h>
#include <linux/interrupt.h>
-#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL
-#include <asm/cpufeature.h>
-#endif
-
#include "mthca_dev.h"
#include "mthca_config_reg.h"
#include "mthca_cmd.h"
@@ -1117,22 +1113,6 @@
{
int ret;
- /*
- * TODO: measure whether dynamically choosing doorbell code at
- * runtime affects our performance. Is there a "magic" way to
- * choose without having to follow a function pointer every
- * time we ring a doorbell?
- */
-#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL
- if (!cpu_has_xmm) {
- printk(KERN_ERR PFX "mthca was compiled with SSE doorbell code, but\n");
- printk(KERN_ERR PFX "the current CPU does not support SSE.\n");
- printk(KERN_ERR PFX "Turn off CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL "
- "and recompile.\n");
- return -ENODEV;
- }
-#endif
-
ret = pci_register_driver(&mthca_driver);
return ret < 0 ? ret : 0;
}
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_doorbell.h 2005-01-23 08:30:38.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_doorbell.h 2005-01-23 21:00:44.746519760 -0800
@@ -32,9 +32,7 @@
* $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $
*/
-#include <linux/config.h>
#include <linux/types.h>
-#include <linux/preempt.h>
#define MTHCA_RD_DOORBELL 0x00
#define MTHCA_SEND_DOORBELL 0x10
@@ -59,51 +57,13 @@
__raw_writeq(*(u64 *) val, dest);
}
-#elif defined(CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL)
-/* Use SSE to write 64 bits atomically without a lock. */
-
-#define MTHCA_DECLARE_DOORBELL_LOCK(name)
-#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0)
-#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL)
-
-static inline unsigned long mthca_get_fpu(void)
-{
- unsigned long cr0;
-
- preempt_disable();
- asm volatile("mov %%cr0,%0; clts" : "=r" (cr0));
- return cr0;
-}
-
-static inline void mthca_put_fpu(unsigned long cr0)
-{
- asm volatile("mov %0,%%cr0" : : "r" (cr0));
- preempt_enable();
-}
-
-static inline void mthca_write64(u32 val[2], void __iomem *dest,
- spinlock_t *doorbell_lock)
-{
- /* i386 stack is aligned to 8 bytes, so this should be OK: */
- u8 xmmsave[8] __attribute__((aligned(8)));
- unsigned long cr0;
-
- cr0 = mthca_get_fpu();
-
- asm volatile (
- "movlps %%xmm0,(%0); \n\t"
- "movlps (%1),%%xmm0; \n\t"
- "movlps %%xmm0,(%2); \n\t"
- "movlps (%0),%%xmm0; \n\t"
- :
- : "r" (xmmsave), "r" (val), "r" (dest)
- : "memory" );
-
- mthca_put_fpu(cr0);
-}
-
#else
-/* Just fall back to a spinlock to protect the doorbell */
+
+/*
+ * Just fall back to a spinlock to protect the doorbell if
+ * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
+ * MMIO writes.
+ */
#define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
#define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
More information about the general
mailing list