[openib-general] [PATCH] Reduce packet loss in receive path, OFED 1.1

Bryan O'Sullivan bos at pathscale.com
Wed Sep 6 15:54:12 PDT 2006


Hi, Tziporet -

This is another patch for RC4, which reduces the likelihood of packet
loss when the receiver is being saturated with packets.  Please apply.

Thanks,

	<b
-------------- next part --------------
IB/ipath - use memcpy_cachebypass to reduce packet loss

In cases where a large incoming RDMA is being received, we have to
copy data inside the interrupt handler before we can ACK each packet.
The source is DMAed to by the hardware, which means that the CPU won't
have it cached.  We only read the source this one time; using normal load
instructions pollutes the dcache with useless data, reducing performance
to the point where we can lose a significant number of packets.

We use memcpy_cachebypass to try to not fill the dcache with useless data.
Avoiding the cache refill penalty lets us keep up better with the sender,
resulting in many fewer dropped packets.

Signed-off-by: Ralph Campbell <ralph.campbell at qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan at qlogic.com>

diff -r d8eed27eaaa2 drivers/infiniband/hw/ipath/Makefile
--- a/drivers/infiniband/hw/ipath/Makefile	Wed Sep 06 13:26:27 2006 -0700
+++ b/drivers/infiniband/hw/ipath/Makefile	Wed Sep 06 15:48:34 2006 -0700
@@ -31,4 +31,5 @@ ib_ipath-y := \
 	ipath_verbs.o
 
 ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
+ib_ipath-$(CONFIG_X86_64) += memcpy_cachebypass_x86_64.o
 ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff -r d8eed27eaaa2 drivers/infiniband/hw/ipath/ipath_verbs.c
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c	Wed Sep 06 13:26:27 2006 -0700
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c	Wed Sep 06 15:48:45 2006 -0700
@@ -40,6 +40,12 @@
 #include "ipath_verbs.h"
 #include "ipath_common.h"
 
+#ifdef __x86_64__
+void *memcpy_cachebypass(void *, const void *, __kernel_size_t);
+#else
+#define memcpy_cachebypass(a,b,c) memcpy((a),(b),(c))
+#endif
+
 static unsigned int ib_ipath_qp_table_size = 251;
 module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(qp_table_size, "QP table size");
@@ -167,7 +173,7 @@ void ipath_copy_sge(struct ipath_sge_sta
 		BUG_ON(len == 0);
 		if (len > length)
 			len = length;
-		memcpy(sge->vaddr, data, len);
+		memcpy_cachebypass(sge->vaddr, data, len);
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
diff -r d8eed27eaaa2 drivers/infiniband/hw/ipath/memcpy_cachebypass_x86_64.S
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/infiniband/hw/ipath/memcpy_cachebypass_x86_64.S	Wed Sep 06 15:48:34 2006 -0700
@@ -0,0 +1,115 @@
+	.text
+	.p2align 4,,15
+	/* rdi  destination, rsi source, rdx count */
+	.globl	memcpy_cachebypass
+	.type	memcpy_cachebypass, @function
+memcpy_cachebypass:
+	movq	%rdi, %rax
+.L5:
+	cmpq	$15, %rdx
+	ja	.L34
+.L3:
+	cmpl	$8, %edx	/* rdx is 0..15 */
+	jbe	.L9
+.L6:
+	testb	$8, %dxl	/* rdx is 3,5,6,7,9..15 */
+	je	.L13
+	movq	(%rsi), %rcx
+	addq	$8, %rsi
+	movq	%rcx, (%rdi)
+	addq	$8, %rdi
+.L13:
+	testb	$4, %dxl
+	je	.L15
+	movl	(%rsi), %ecx
+	addq	$4, %rsi
+	movl	%ecx, (%rdi)
+	addq	$4, %rdi
+.L15:
+	testb	$2, %dxl
+	je	.L17
+	movzwl	(%rsi), %ecx
+	addq	$2, %rsi
+	movw	%cx, (%rdi)
+	addq	$2, %rdi
+.L17:
+	testb	$1, %dxl
+	je	.L33
+.L1:
+	movzbl	(%rsi), %ecx
+	movb	%cl, (%rdi)
+.L33:
+	ret
+.L34:
+	cmpq	$63, %rdx	/* rdx is > 15 */
+	ja	.L64
+	movl	$16, %ecx	/* rdx is 16..63 */
+.L25:
+	movq	8(%rsi), %r8
+	movq	(%rsi), %r9
+	addq	%rcx, %rsi
+	movq	%r8, 8(%rdi)
+	movq	%r9, (%rdi)
+	addq	%rcx, %rdi
+	subq	%rcx, %rdx
+	cmpl	%edx, %ecx	/* is rdx >= 16? */
+	jbe	.L25
+	jmp	.L3		/* rdx is 0..15 */
+	.p2align 4,,7
+.L64:
+	movl	$64, %ecx
+.L42:
+	prefetchnta	128(%rsi)
+	movq	(%rsi), %r8
+	movq	8(%rsi), %r9
+	movq	16(%rsi), %r10
+	movq	24(%rsi), %r11
+	subq	%rcx, %rdx
+	movq	%r8, (%rdi)
+	movq	32(%rsi), %r8
+	movq	%r9, 8(%rdi)
+	movq	40(%rsi), %r9
+	movq	%r10, 16(%rdi)
+	movq	48(%rsi), %r10
+	movq	%r11, 24(%rdi)
+	movq	56(%rsi), %r11
+	addq	%rcx, %rsi
+	movq	%r8, 32(%rdi)
+	movq	%r9, 40(%rdi)
+	movq	%r10, 48(%rdi)
+	movq	%r11, 56(%rdi)
+	addq	%rcx, %rdi
+	cmpq	%rdx, %rcx	/* is rdx >= 64? */
+	jbe	.L42
+	sfence
+	orl	%edx, %edx
+	je	.L33
+	jmp	.L5
+.L9:
+	jmp	*.L12(,%rdx,8)	/* rdx is 0..8 */
+	.section	.rodata
+	.align 8
+	.align 4
+.L12:
+	.quad	.L33
+	.quad	.L1
+	.quad	.L2
+	.quad	.L6
+	.quad	.L4
+	.quad	.L6
+	.quad	.L6
+	.quad	.L6
+	.quad	.L8
+	.text
+.L2:
+	movzwl	(%rsi), %ecx
+	movw	%cx, (%rdi)
+	ret
+.L4:
+	movl	(%rsi), %ecx
+	movl	%ecx, (%rdi)
+	ret
+.L8:
+	movq	(%rsi), %rcx
+	movq	%rcx, (%rdi)
+	ret


More information about the general mailing list