[ofa-general] [PATCH 3 OF 5 v2] libmlx4: avoid adding unneeded extra CQE when creating a cq
Jack Morgenstein
jackm at dev.mellanox.co.il
Sun Oct 28 00:51:38 PDT 2007
commit c04463eb343a0f038eb7a2a877be90cd3e3e19a3
Author: Jack Morgenstein <jackm at mellanox.co.il>
Date: Thu Oct 25 19:17:42 2007 +0200
Do not add an extra CQE when creating a CQ.
Sanity-check against returned device capabilities,
to avoid breaking ABI.
Set minimum to 2, to avoid rejection by kernel.
Adjust num cqes passed to verbs layer.
Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>
---
Roland,
The previous patch neglected to increase the number of CQEs returned
to the verbs-layer caller by 1. If the mlx4 layer was invoked with a
power of 2, the returned value was <power-of-2> - 1, which is not in
conformance with the the IB spec.
This patch fixes that oversight. In order to preserve the ABI, the
corresponding kernel patch still returns <power-of-2> - 1; however,
the user layer can determine if the kernel has adjusted the number
of CQEs per qp by examining if the device-capability max_cqes is a
power of 2 -- if so, then create_cq() can increment the returned
cqe value by 1.
Its possible that this increment can be done unconditionally
(i.e., even if there is a previous kernel driver installed) -- I've
not yet checked this out.
- Jack
diff --git a/src/cq.c b/src/cq.c
index c0d7a8b..aac84da 100644
--- a/src/cq.c
+++ b/src/cq.c
@@ -114,10 +114,10 @@ static struct mlx4_cqe *get_cqe(struct mlx4_cq *cq, int entry)
static void *get_sw_cqe(struct mlx4_cq *cq, int n)
{
- struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibv_cq.cqe);
+ struct mlx4_cqe *cqe = get_cqe(cq, n & cq->cqe_mask);
return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
- !!(n & (cq->ibv_cq.cqe + 1))) ? NULL : cqe;
+ !!(n & (cq->cqe_mask + 1))) ? NULL : cqe;
}
static struct mlx4_cqe *next_cqe_sw(struct mlx4_cq *cq)
@@ -417,7 +417,7 @@ void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
* from our QP and therefore don't need to be checked.
*/
for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
- if (prod_index == cq->cons_index + cq->ibv_cq.cqe)
+ if (prod_index == cq->cons_index + cq->cqe_mask)
break;
/*
@@ -425,7 +425,7 @@ void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
* that match our QP by copying older entries on top of them.
*/
while ((int) --prod_index - (int) cq->cons_index >= 0) {
- cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
+ cqe = get_cqe(cq, prod_index & cq->cqe_mask);
if (is_xrc_srq &&
(ntohl(cqe->g_mlpath_rqpn & 0xffffff) == srq->srqn) &&
!(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) {
@@ -436,7 +436,7 @@ void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
++nfreed;
} else if (nfreed) {
- dest = get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe);
+ dest = get_cqe(cq, (prod_index + nfreed) & cq->cqe_mask);
owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
memcpy(dest, cqe, sizeof *cqe);
dest->owner_sr_opcode = owner_bit |
diff --git a/src/mlx4.h b/src/mlx4.h
index 09e2bdd..707061b 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -216,6 +216,7 @@ struct mlx4_cq {
uint32_t *set_ci_db;
uint32_t *arm_db;
int arm_sn;
+ uint32_t cqe_mask;
};
struct mlx4_srq {
diff --git a/src/verbs.c b/src/verbs.c
index 059b534..d2a15d5 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -168,11 +168,22 @@ struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
struct mlx4_create_cq_resp resp;
struct mlx4_cq *cq;
int ret;
+ struct mlx4_context *mctx = to_mctx(context);
+ int no_spare_cqe = 0;
/* Sanity check CQ size before proceeding */
- if (cqe > 0x3fffff)
+ if (cqe < 1 || cqe > mctx->max_cqe)
return NULL;
+ /* if max allowable cqes is a power-of-2, no spare cqe fix is in
+ * the kernel
+ */
+ if (mctx->max_cqe == align_queue_size(mctx->max_cqe))
+ no_spare_cqe = 1;
+
+ /* raise minimum, to avoid breaking ABI */
+ cqe = (cqe == 1) ? 2 : cqe;
+
cq = malloc(sizeof *cq);
if (!cq)
return NULL;
@@ -182,7 +193,7 @@ struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
if (pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE))
goto err;
- cqe = align_queue_size(cqe + 1);
+ cqe = align_queue_size(cqe);
if (mlx4_alloc_buf(&cq->buf, cqe * MLX4_CQ_ENTRY_SIZE,
to_mdev(context->device)->page_size))
@@ -209,6 +220,9 @@ struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
goto err_db;
cq->cqn = resp.cqn;
+ cq->cqe_mask = cq->ibv_cq.cqe;
+ if (no_spare_cqe)
+ cq->ibv_cq.cqe++;
return &cq->ibv_cq;
More information about the general
mailing list