[ofa-general] Re: send max_sge lower than reported by ib_query_device
Roland Dreier
rdreier at cisco.com
Thu Sep 27 14:46:45 PDT 2007
> The same bug exists with mthca. I saw it originally in the kernel doing RDS work, but I just put together a short user space test.
Thanks. The patch below seems to fix this for me. I guess I'll queue
this for 2.6.24.
I'm also including the test program I wrote to verify this; mlx4 and
mthca seem OK on my system now.
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 60de6f9..0c22cf0 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -45,6 +45,7 @@
#include "mthca_cmd.h"
#include "mthca_profile.h"
#include "mthca_memfree.h"
+#include "mthca_wqe.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
@@ -205,7 +206,20 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
mdev->limits.gid_table_len = dev_lim->max_gids;
mdev->limits.pkey_table_len = dev_lim->max_pkeys;
mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
- mdev->limits.max_sg = dev_lim->max_sg;
+ /*
+ * Reduce max_sg to a value so that all possible send requests
+ * will fit into max_desc_sz; send requests will need a next
+ * segment plus possibly another extra segment, and the UD
+ * segment is the biggest extra segment.
+ */
+ mdev->limits.max_sg =
+ min_t(int, dev_lim->max_sg,
+ (dev_lim->max_desc_sz -
+ (sizeof (struct mthca_next_seg) +
+ (mthca_is_memfree(mdev) ?
+ sizeof (struct mthca_arbel_ud_seg) :
+ sizeof (struct mthca_tavor_ud_seg)))) /
+ sizeof (struct mthca_data_seg));
mdev->limits.max_wqes = dev_lim->max_qp_sz;
mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp;
mdev->limits.reserved_qps = dev_lim->reserved_qps;
---
Here's the test program:
#include <stdio.h>
#include <string.h>
#include <infiniband/verbs.h>
int main(int argc, char *argv)
{
struct ibv_device **dev_list;
struct ibv_device_attr dev_attr;
struct ibv_context *context;
struct ibv_pd *pd;
struct ibv_cq *cq;
struct ibv_qp_init_attr qp_attr;
int t;
static const struct {
enum ibv_qp_type type;
char *name;
} type_tab[] = {
{ IBV_QPT_RC, "RC" },
{ IBV_QPT_UC, "UC" },
{ IBV_QPT_UD, "UD" },
};
dev_list = ibv_get_device_list(NULL);
if (!dev_list) {
printf("No IB devices found\n");
return 1;
}
for (; *dev_list; ++dev_list) {
printf("%s:\n", ibv_get_device_name(*dev_list));
context = ibv_open_device(*dev_list);
if (!context) {
printf(" ibv_open_device failed\n");
continue;
}
if (ibv_query_device(context, &dev_attr)) {
printf(" ibv_query_device failed\n");
continue;
}
cq = ibv_create_cq(context, 1, NULL, NULL, 0);
if (!cq) {
printf(" ibv_create_cq failed\n");
continue;
}
pd = ibv_alloc_pd(context);
if (!pd) {
printf(" ibv_alloc_pd failed\n");
continue;
}
for (t = 0; t < sizeof type_tab / sizeof type_tab[0]; ++t) {
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.send_cq = cq;
qp_attr.recv_cq = cq;
qp_attr.cap.max_send_wr = 1;
qp_attr.cap.max_recv_wr = 1;
qp_attr.cap.max_send_sge = dev_attr.max_sge;
qp_attr.cap.max_recv_sge = dev_attr.max_sge;
qp_attr.qp_type = type_tab[t].type;
printf(" %s: SGE %d ", type_tab[t].name, dev_attr.max_sge);
if (ibv_create_qp(pd, &qp_attr))
printf("ok\n");
else
printf("FAILED\n");
}
}
return 0;
}
More information about the general
mailing list