[openib-general] [PATCH] Reduce packet loss in receive path, OFED 1.1
Bryan O'Sullivan
bos at pathscale.com
Wed Sep 6 15:54:12 PDT 2006
Hi, Tziporet -
This is another patch for RC4, which reduces the likelihood of packet
loss when the receiver is being saturated with packets. Please apply.
Thanks,
<b
-------------- next part --------------
IB/ipath - use memcpy_cachebypass to reduce packet loss
In cases where a large incoming RDMA is being received, we have to
copy data inside the interrupt handler before we can ACK each packet.
The source is DMAed to by the hardware, which means that the CPU won't
have it cached. We only read the source this one time; using normal load
instructions pollutes the dcache with useless data, reducing performance
to the point where we can lose a significant number of packets.
We use memcpy_cachebypass to try to not fill the dcache with useless data.
Avoiding the cache refill penalty lets us keep up better with the sender,
resulting in many fewer dropped packets.
Signed-off-by: Ralph Campbell <ralph.campbell at qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan at qlogic.com>
diff -r d8eed27eaaa2 drivers/infiniband/hw/ipath/Makefile
--- a/drivers/infiniband/hw/ipath/Makefile Wed Sep 06 13:26:27 2006 -0700
+++ b/drivers/infiniband/hw/ipath/Makefile Wed Sep 06 15:48:34 2006 -0700
@@ -31,4 +31,5 @@ ib_ipath-y := \
ipath_verbs.o
ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
+ib_ipath-$(CONFIG_X86_64) += memcpy_cachebypass_x86_64.o
ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff -r d8eed27eaaa2 drivers/infiniband/hw/ipath/ipath_verbs.c
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c Wed Sep 06 13:26:27 2006 -0700
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c Wed Sep 06 15:48:45 2006 -0700
@@ -40,6 +40,12 @@
#include "ipath_verbs.h"
#include "ipath_common.h"
+#ifdef __x86_64__
+void *memcpy_cachebypass(void *, const void *, __kernel_size_t);
+#else
+#define memcpy_cachebypass(a,b,c) memcpy((a),(b),(c))
+#endif
+
static unsigned int ib_ipath_qp_table_size = 251;
module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
MODULE_PARM_DESC(qp_table_size, "QP table size");
@@ -167,7 +173,7 @@ void ipath_copy_sge(struct ipath_sge_sta
BUG_ON(len == 0);
if (len > length)
len = length;
- memcpy(sge->vaddr, data, len);
+ memcpy_cachebypass(sge->vaddr, data, len);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
diff -r d8eed27eaaa2 drivers/infiniband/hw/ipath/memcpy_cachebypass_x86_64.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/infiniband/hw/ipath/memcpy_cachebypass_x86_64.S Wed Sep 06 15:48:34 2006 -0700
@@ -0,0 +1,115 @@
+ .text
+ .p2align 4,,15
+ /* rdi destination, rsi source, rdx count */
+ .globl memcpy_cachebypass
+ .type memcpy_cachebypass, @function
+memcpy_cachebypass:
+ movq %rdi, %rax
+.L5:
+ cmpq $15, %rdx
+ ja .L34
+.L3:
+ cmpl $8, %edx /* rdx is 0..15 */
+ jbe .L9
+.L6:
+ testb $8, %dxl /* rdx is 3,5,6,7,9..15 */
+ je .L13
+ movq (%rsi), %rcx
+ addq $8, %rsi
+ movq %rcx, (%rdi)
+ addq $8, %rdi
+.L13:
+ testb $4, %dxl
+ je .L15
+ movl (%rsi), %ecx
+ addq $4, %rsi
+ movl %ecx, (%rdi)
+ addq $4, %rdi
+.L15:
+ testb $2, %dxl
+ je .L17
+ movzwl (%rsi), %ecx
+ addq $2, %rsi
+ movw %cx, (%rdi)
+ addq $2, %rdi
+.L17:
+ testb $1, %dxl
+ je .L33
+.L1:
+ movzbl (%rsi), %ecx
+ movb %cl, (%rdi)
+.L33:
+ ret
+.L34:
+ cmpq $63, %rdx /* rdx is > 15 */
+ ja .L64
+ movl $16, %ecx /* rdx is 16..63 */
+.L25:
+ movq 8(%rsi), %r8
+ movq (%rsi), %r9
+ addq %rcx, %rsi
+ movq %r8, 8(%rdi)
+ movq %r9, (%rdi)
+ addq %rcx, %rdi
+ subq %rcx, %rdx
+ cmpl %edx, %ecx /* is rdx >= 16? */
+ jbe .L25
+ jmp .L3 /* rdx is 0..15 */
+ .p2align 4,,7
+.L64:
+ movl $64, %ecx
+.L42:
+ prefetchnta 128(%rsi)
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq %rcx, %rdx
+ movq %r8, (%rdi)
+ movq 32(%rsi), %r8
+ movq %r9, 8(%rdi)
+ movq 40(%rsi), %r9
+ movq %r10, 16(%rdi)
+ movq 48(%rsi), %r10
+ movq %r11, 24(%rdi)
+ movq 56(%rsi), %r11
+ addq %rcx, %rsi
+ movq %r8, 32(%rdi)
+ movq %r9, 40(%rdi)
+ movq %r10, 48(%rdi)
+ movq %r11, 56(%rdi)
+ addq %rcx, %rdi
+ cmpq %rdx, %rcx /* is rdx >= 64? */
+ jbe .L42
+ sfence
+ orl %edx, %edx
+ je .L33
+ jmp .L5
+.L9:
+ jmp *.L12(,%rdx,8) /* rdx is 0..8 */
+ .section .rodata
+ .align 8
+ .align 4
+.L12:
+ .quad .L33
+ .quad .L1
+ .quad .L2
+ .quad .L6
+ .quad .L4
+ .quad .L6
+ .quad .L6
+ .quad .L6
+ .quad .L8
+ .text
+.L2:
+ movzwl (%rsi), %ecx
+ movw %cx, (%rdi)
+ ret
+.L4:
+ movl (%rsi), %ecx
+ movl %ecx, (%rdi)
+ ret
+.L8:
+ movq (%rsi), %rcx
+ movq %rcx, (%rdi)
+ ret
More information about the general
mailing list