[ofa-general] Re: send max_sge lower than reported by ib_query_device

Roland Dreier rdreier at cisco.com
Thu Sep 27 14:46:45 PDT 2007


 > The same bug exists with mthca.  I saw it originally in the kernel doing RDS work, but I just put together a short user space test.

Thanks.  The patch below seems to fix this for me.  I guess I'll queue
this for 2.6.24.

I'm also including the test program I wrote to verify this; mlx4 and
mthca seem OK on my system now.

diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 60de6f9..0c22cf0 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -45,6 +45,7 @@
 #include "mthca_cmd.h"
 #include "mthca_profile.h"
 #include "mthca_memfree.h"
+#include "mthca_wqe.h"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
@@ -205,7 +206,20 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
 	mdev->limits.gid_table_len  	= dev_lim->max_gids;
 	mdev->limits.pkey_table_len 	= dev_lim->max_pkeys;
 	mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
-	mdev->limits.max_sg             = dev_lim->max_sg;
+	/*
+	 * Reduce max_sg to a value so that all possible send requests
+	 * will fit into max_desc_sz; send requests will need a next
+	 * segment plus possibly another extra segment, and the UD
+	 * segment is the biggest extra segment.
+	 */
+	mdev->limits.max_sg             =
+		min_t(int, dev_lim->max_sg,
+		      (dev_lim->max_desc_sz -
+		       (sizeof (struct mthca_next_seg) +
+			(mthca_is_memfree(mdev) ?
+			 sizeof (struct mthca_arbel_ud_seg) :
+			 sizeof (struct mthca_tavor_ud_seg)))) /
+		      sizeof (struct mthca_data_seg));
 	mdev->limits.max_wqes           = dev_lim->max_qp_sz;
 	mdev->limits.max_qp_init_rdma   = dev_lim->max_requester_per_qp;
 	mdev->limits.reserved_qps       = dev_lim->reserved_qps;


---

Here's the test program:

#include <stdio.h>
#include <string.h>

#include <infiniband/verbs.h>

int main(int argc, char *argv)
{
	struct ibv_device      **dev_list;
	struct ibv_device_attr	 dev_attr;
	struct ibv_context	*context;
	struct ibv_pd		*pd;
	struct ibv_cq		*cq;
	struct ibv_qp_init_attr  qp_attr;
	int			 t;
	static const struct {
		enum ibv_qp_type type;
		char            *name;
	}			 type_tab[] = {
		{ IBV_QPT_RC, "RC" },
		{ IBV_QPT_UC, "UC" },
		{ IBV_QPT_UD, "UD" },
	};

	dev_list = ibv_get_device_list(NULL);
	if (!dev_list) {
		printf("No IB devices found\n");
		return 1;
	}

	for (; *dev_list; ++dev_list) {
		printf("%s:\n", ibv_get_device_name(*dev_list));

		context = ibv_open_device(*dev_list);
		if (!context) {
			printf("  ibv_open_device failed\n");
			continue;
		}

		if (ibv_query_device(context, &dev_attr)) {
			printf("  ibv_query_device failed\n");
			continue;
		}

		cq = ibv_create_cq(context, 1, NULL, NULL, 0);
		if (!cq) {
			printf("  ibv_create_cq failed\n");
			continue;
		}

		pd = ibv_alloc_pd(context);
		if (!pd) {
			printf("  ibv_alloc_pd failed\n");
			continue;
		}

		for (t = 0; t < sizeof type_tab / sizeof type_tab[0]; ++t) {
			memset(&qp_attr, 0, sizeof qp_attr);

			qp_attr.send_cq = cq;
			qp_attr.recv_cq = cq;
			qp_attr.cap.max_send_wr = 1;
			qp_attr.cap.max_recv_wr = 1;
			qp_attr.cap.max_send_sge = dev_attr.max_sge;
			qp_attr.cap.max_recv_sge = dev_attr.max_sge;
			qp_attr.qp_type = type_tab[t].type;

			printf("  %s: SGE %d ", type_tab[t].name, dev_attr.max_sge);

			if (ibv_create_qp(pd, &qp_attr))
				printf("ok\n");
			else
				printf("FAILED\n");
		}
	}

	return 0;
}



More information about the general mailing list