[openib-general] [PATCH] libmthca: optimize calls to htonl with constant parameter
Michael S. Tsirkin
mst at mellanox.co.il
Thu Feb 22 15:57:24 PST 2007
GCC seems to be unable to propogate constants across calls to htonl.
So it turns out to be worth the while to replace htonl with
a hand-written macro in case of constant parameter.
Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>
Signed-off-by: Ishai Rabinovitz <ishai at mellanox.co.il>
---
Roland, I'm looking at micro-optimizing libmthca/mthca some more.
The following optimization is minor, but it seems quite safe.
What do you think? Tested with gcc 4.0.3.
diff --git a/src/cq.c b/src/cq.c
index 0aeb7a9..9428f74 100644
--- a/src/cq.c
+++ b/src/cq.c
@@ -275,7 +275,7 @@ static int handle_error_cqe(struct mthca_cq *cq,
* doorbell count field. In that case we always free the CQE.
*/
if (mthca_is_memfree(cq->ibv_cq.context) ||
- !(new_wqe & htonl(0x3f)) || (!cqe->db_cnt && dbd))
+ !(new_wqe & CONSTANT_HTONL(0x3f)) || (!cqe->db_cnt && dbd))
return 0;
cqe->db_cnt = htons(ntohs(cqe->db_cnt) - dbd);
diff --git a/src/mthca.h b/src/mthca.h
index 1f31bc3..798029f 100644
--- a/src/mthca.h
+++ b/src/mthca.h
@@ -112,6 +112,20 @@ enum {
MTHCA_OPCODE_INVALID = 0xff
};
+/* GCC does not seem to be able to do constant propogation
+ * across htonl/ntohl calls */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define CONSTANT_HTONL(x) \
+ ((((unsigned)x) >> 24) | \
+ ((((unsigned)x) >> 8) & 0xff00) | \
+ ((((unsigned)x) << 8) & 0xff0000) | \
+ (((unsigned)x) << 24))
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define CONSTANT_HTONL(x) (x)
+#else
+#define CONSTANT_HTONL(x) htonl(x)
+#endif
+
struct mthca_ah_page;
struct mthca_device {
diff --git a/src/qp.c b/src/qp.c
index f2483e9..85d3385 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -138,10 +138,10 @@ int mthca_tavor_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
((struct mthca_next_seg *) wqe)->ee_nds = 0;
((struct mthca_next_seg *) wqe)->flags =
((wr->send_flags & IBV_SEND_SIGNALED) ?
- htonl(MTHCA_NEXT_CQ_UPDATE) : 0) |
+ CONSTANT_HTONL(MTHCA_NEXT_CQ_UPDATE) : 0) |
((wr->send_flags & IBV_SEND_SOLICITED) ?
- htonl(MTHCA_NEXT_SOLICIT) : 0) |
- htonl(1);
+ CONSTANT_HTONL(MTHCA_NEXT_SOLICIT) : 0) |
+ CONSTANT_HTONL(1);
if (wr->opcode == IBV_WR_SEND_WITH_IMM ||
wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
@@ -359,9 +359,9 @@ int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
((struct mthca_next_seg *) wqe)->nda_op = 0;
((struct mthca_next_seg *) wqe)->ee_nds =
- htonl(MTHCA_NEXT_DBD);
+ CONSTANT_HTONL(MTHCA_NEXT_DBD);
((struct mthca_next_seg *) wqe)->flags =
- htonl(MTHCA_NEXT_CQ_UPDATE);
+ CONSTANT_HTONL(MTHCA_NEXT_CQ_UPDATE);
wqe += sizeof (struct mthca_next_seg);
size = sizeof (struct mthca_next_seg) / 16;
@@ -505,10 +505,10 @@ int mthca_arbel_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
((struct mthca_next_seg *) wqe)->flags =
((wr->send_flags & IBV_SEND_SIGNALED) ?
- htonl(MTHCA_NEXT_CQ_UPDATE) : 0) |
+ CONSTANT_HTONL(MTHCA_NEXT_CQ_UPDATE) : 0) |
((wr->send_flags & IBV_SEND_SOLICITED) ?
- htonl(MTHCA_NEXT_SOLICIT) : 0) |
- htonl(1);
+ CONSTANT_HTONL(MTHCA_NEXT_SOLICIT) : 0) |
+ CONSTANT_HTONL(1);
if (wr->opcode == IBV_WR_SEND_WITH_IMM ||
wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
@@ -750,7 +750,7 @@ int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
if (i < qp->rq.max_gs) {
((struct mthca_data_seg *) wqe)->byte_count = 0;
- ((struct mthca_data_seg *) wqe)->lkey = htonl(MTHCA_INVAL_LKEY);
+ ((struct mthca_data_seg *) wqe)->lkey = CONSTANT_HTONL(MTHCA_INVAL_LKEY);
((struct mthca_data_seg *) wqe)->addr = 0;
}
@@ -872,7 +872,7 @@ int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
for (scatter = (void *) (next + 1);
(void *) scatter < (void *) next + (1 << qp->rq.wqe_shift);
++scatter)
- scatter->lkey = htonl(MTHCA_INVAL_LKEY);
+ scatter->lkey = CONSTANT_HTONL(MTHCA_INVAL_LKEY);
}
for (i = 0; i < qp->sq.max; ++i) {
@@ -956,10 +956,10 @@ int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
else
next = get_recv_wqe(qp, index);
- *dbd = !!(next->ee_nds & htonl(MTHCA_NEXT_DBD));
- if (next->ee_nds & htonl(0x3f))
- *new_wqe = (next->nda_op & htonl(~0x3f)) |
- (next->ee_nds & htonl(0x3f));
+ *dbd = !!(next->ee_nds & CONSTANT_HTONL(MTHCA_NEXT_DBD));
+ if (next->ee_nds & CONSTANT_HTONL(0x3f))
+ *new_wqe = (next->nda_op & CONSTANT_HTONL(~0x3f)) |
+ (next->ee_nds & CONSTANT_HTONL(0x3f));
else
*new_wqe = 0;
diff --git a/src/srq.c b/src/srq.c
index f9fc006..e27c8dc 100644
--- a/src/srq.c
+++ b/src/srq.c
@@ -142,7 +142,7 @@ int mthca_tavor_post_srq_recv(struct ibv_srq *ibsrq,
if (i < srq->max_gs) {
((struct mthca_data_seg *) wqe)->byte_count = 0;
- ((struct mthca_data_seg *) wqe)->lkey = htonl(MTHCA_INVAL_LKEY);
+ ((struct mthca_data_seg *) wqe)->lkey = CONSTANT_HTONL(MTHCA_INVAL_LKEY);
((struct mthca_data_seg *) wqe)->addr = 0;
}
@@ -150,7 +150,7 @@ int mthca_tavor_post_srq_recv(struct ibv_srq *ibsrq,
htonl((ind << srq->wqe_shift) | 1);
wmb();
((struct mthca_next_seg *) prev_wqe)->ee_nds =
- htonl(MTHCA_NEXT_DBD);
+ CONSTANT_HTONL(MTHCA_NEXT_DBD);
srq->wrid[ind] = wr->wr_id;
srq->first_free = next_ind;
@@ -247,7 +247,7 @@ int mthca_arbel_post_srq_recv(struct ibv_srq *ibsrq,
if (i < srq->max_gs) {
((struct mthca_data_seg *) wqe)->byte_count = 0;
- ((struct mthca_data_seg *) wqe)->lkey = htonl(MTHCA_INVAL_LKEY);
+ ((struct mthca_data_seg *) wqe)->lkey = CONSTANT_HTONL(MTHCA_INVAL_LKEY);
((struct mthca_data_seg *) wqe)->addr = 0;
}
@@ -313,7 +313,7 @@ int mthca_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
for (scatter = wqe + sizeof (struct mthca_next_seg);
(void *) scatter < wqe + (1 << srq->wqe_shift);
++scatter)
- scatter->lkey = htonl(MTHCA_INVAL_LKEY);
+ scatter->lkey = CONSTANT_HTONL(MTHCA_INVAL_LKEY);
}
srq->first_free = 0;
--
MST
More information about the general
mailing list