[ofa-general] [PATCH] libmlx4: avoid memcpy in blueflame post_sends
Jack Morgenstein
jackm at dev.mellanox.co.il
Wed Jan 9 02:23:14 PST 2008
Do not use memcpy when copying to the BlueFlame buffer.
memcpy implementations may use move-string-buffer (byte-wise
copy) assembler instructions, which do not guarantee copy order
into the blueflame buffer. Use a tight for-loop instead.
BTW, this patch also slightly improves latency.
Signed-off-by: Jack Morgenstein <jackm at mellanox.co.il>
---
diff --git a/src/doorbell.h b/src/doorbell.h
index 3171e76..c89ef0e 100644
--- a/src/doorbell.h
+++ b/src/doorbell.h
@@ -35,6 +35,8 @@
#if SIZEOF_LONG == 8
+typedef uint64_t mlx4_wc_copy_t;
+
#if __BYTE_ORDER == __LITTLE_ENDIAN
# define MLX4_PAIR_TO_64(val) ((uint64_t) val[1] << 32 | val[0])
#elif __BYTE_ORDER == __BIG_ENDIAN
@@ -50,6 +52,8 @@ static inline void mlx4_write64(uint32_t val[2], struct mlx4_context *ctx, int o
#else
+typedef uint32_t mlx4_wc_copy_t;
+
static inline void mlx4_write64(uint32_t val[2], struct mlx4_context *ctx, int offset)
{
pthread_spin_lock(&ctx->uar_lock);
diff --git a/src/qp.c b/src/qp.c
index bced740..8fc8450 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -391,7 +391,23 @@ out:
pthread_spin_lock(&ctx->bf_lock);
- memcpy(ctx->bf_page + ctx->bf_offset, ctrl, align(size * 16, 64));
+ /*
+ * Avoid using memcpy to copy to BlueFlame page, since recent
+ * memcpy implementations use move-string-buffer assembler
+ * instructions, which do not guarantee order of copying.
+ */
+
+ {
+ mlx4_wc_copy_t *target =
+ (mlx4_wc_copy_t *) (ctx->bf_page + ctx->bf_offset);
+ mlx4_wc_copy_t *src = (mlx4_wc_copy_t *) ctrl;
+ int n = align(size * 16, 64) / (sizeof(mlx4_wc_copy_t) * 2);
+ for (; n; --n) {
+ *target++ = *src++;
+ *target++ = *src++;
+ }
+ }
+
wc_wmb();
ctx->bf_offset ^= ctx->bf_buf_size;
More information about the general
mailing list