[ofa-general] [PATCH 2 of 2] IB/mlx4: Handle new FW requirement for send request prefetching, for WQE sg lists
Jack Morgenstein
jackm at dev.mellanox.co.il
Tue Sep 4 00:47:31 PDT 2007
This is an addendum to Roland's commit 0e6e74162164d908edf7889ac66dca09e7505745
(June 18). This addendum adds prefetch headroom marking processing for s/g segments.
We write s/g segments in reverse order into the WQE, in order to guarantee
that the first dword of all cachelines containing s/g segments is written last
(overwriting the headroom invalidation pattern). The entire cacheline will thus
contain valid data when the invalidation pattern is overwritten.
Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>
Index: ofed_kernel/drivers/infiniband/hw/mlx4/qp.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/hw/mlx4/qp.c 2007-07-30 16:35:01.000000000 +0300
+++ ofed_kernel/drivers/infiniband/hw/mlx4/qp.c 2007-07-30 17:05:47.000000000 +0300
@@ -1215,9 +1215,18 @@ static void set_datagram_seg(struct mlx4
static void set_data_seg(struct mlx4_wqe_data_seg *dseg,
struct ib_sge *sg)
{
- dseg->byte_count = cpu_to_be32(sg->length);
dseg->lkey = cpu_to_be32(sg->lkey);
dseg->addr = cpu_to_be64(sg->addr);
+
+ /* Need a barrier before writing the byte_count field
+ * to make sure that all the data is visible before the
+ * byte_count field is set. Otherwise, if the segment
+ * begins a new cacheline, the HCA prefetcher could
+ * grab the 64-byte chunk and get a valid (!= * 0xffffffff)
+ * byte count but stale data, and end up sending the wrong
+ * data. */
+ wmb();
+ dseg->byte_count = cpu_to_be32(sg->length);
}
int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
@@ -1226,6 +1235,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp
struct mlx4_ib_qp *qp = to_mqp(ibqp);
void *wqe;
struct mlx4_wqe_ctrl_seg *ctrl;
+ struct mlx4_wqe_data_seg *seg;
unsigned long flags;
int nreq;
int err = 0;
@@ -1325,19 +1335,22 @@ int mlx4_ib_post_send(struct ib_qp *ibqp
break;
}
- for (i = 0; i < wr->num_sge; ++i) {
- set_data_seg(wqe, wr->sg_list + i);
-
- wqe += sizeof (struct mlx4_wqe_data_seg);
+ seg = (struct mlx4_wqe_data_seg *) wqe;
+ /* Add one more inline data segment for ICRC for MLX sends.
+ * Write this inline and all s/g segments in reverse order,
+ * so as to overwrite cacheline stamp last within each
+ * cacheline. */
+ if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) {
+ void *t = wqe + (wr->num_sge) * sizeof(struct mlx4_wqe_data_seg);
+ ((u32 *) t)[1] = 0;
+ wmb();
+ ((struct mlx4_wqe_inline_seg *) t)->byte_count =
+ cpu_to_be32((1 << 31) | 4);
size += sizeof (struct mlx4_wqe_data_seg) / 16;
}
- /* Add one more inline data segment for ICRC for MLX sends */
- if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) {
- ((struct mlx4_wqe_inline_seg *) wqe)->byte_count =
- cpu_to_be32((1 << 31) | 4);
- ((u32 *) wqe)[1] = 0;
- wqe += sizeof (struct mlx4_wqe_data_seg);
+ for (i = wr->num_sge - 1; i >= 0; --i) {
+ set_data_seg(seg + i, wr->sg_list + i);
size += sizeof (struct mlx4_wqe_data_seg) / 16;
}
More information about the general
mailing list