[ofa-general] [PATCH RFC] rds: add iwarp support

Jon Mason jon at opengridcomputing.com
Tue Jul 8 13:27:55 PDT 2008


Hey Olaf,
With the patch below on top of your previous patch, I am able to get it
working over iWARP (though RDMA is still breaking, but in a new and
different way).

The patch modifies rds_ib_laddr_check to use rdma_bind_addr to determine
if it is a IB/iWARP device by creating a temporary rdma_cm_id and
binding to the address passed in.  I'm sure this can be optimized, but
I'll leave that as a todo for later.

This patch reduces the default send_wr and recv_wr to sizes that Chelsio
can handle, as the sizes before were too large.  Optimally, this should
be determined with a query of the device.  I have added that to my todo
list as well

The lguid and fguid cannot be used for iWARP as those structs do not
exists and give off a nasty illegal pointer deference.  I removed the
references to those, as the appear to only be for debug messages.

Lastly, this has the port space differentiation (which is the only way
TCP and iWARP can happily co-exist).

Let me know what you think.

Thanks,
Jon

Signed-off-by: Jon Mason <jon at opengridcomputing.com>

diff --git a/net/rds/ib.c b/net/rds/ib.c
index cd2dc7c..067fab0 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -158,17 +158,29 @@ void rds_ib_remove_one(struct ib_device *device)
  */
 static int rds_ib_laddr_check(__be32 addr)
 {
-	struct net_device *dev;
-	int ret;
+	int ret = -EADDRNOTAVAIL;
+	struct rdma_cm_id *cm_id;
+	struct sockaddr_in sin;
+
+	cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP);
+	if (cm_id == NULL) {
+		printk("rdma_create_id failed\n");
+		return -EADDRNOTAVAIL;
+	}
 
-	dev = ip_dev_find(addr);
-	if (dev && dev->type == ARPHRD_INFINIBAND) {
-		dev_put(dev);
-		ret = 0;
-	} else
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = addr;
+
+	/* rdma_bind_addr will fail for non-IB/iWARP devices */
+	ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
+        if (ret)
 		ret = -EADDRNOTAVAIL;
 
-	rdsdebug("addr %u.%u.%u.%u ret %d\n", NIPQUAD(addr), ret);
+	rdsdebug("addr %u.%u.%u.%u ret %d node type %d\n",
+		 NIPQUAD(addr), ret, cm_id->device?cm_id->device->node_type:-1);
+
+	rdma_destroy_id(cm_id);
 
 	return ret;
 }
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 947977c..2924fda 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -7,14 +7,16 @@
 
 #define RDS_IB_RESOLVE_TIMEOUT_MS	5000
 
-#define RDS_FMR_SIZE			256
+/* FIXME - use ib_query_device to determine proper value */
+#define RDS_FMR_SIZE			20 
 #define RDS_FMR_POOL_SIZE		2048
 
-#define RDS_IB_MAX_SGE			8
+#define RDS_IB_MAX_SGE		8
 #define RDS_IB_RECV_SGE 		2
 
-#define RDS_IB_DEFAULT_RECV_WR		1024
-#define RDS_IB_DEFAULT_SEND_WR		256
+/* FIXME - call ib_query_device to determine sane values for this based on the HW */
+#define RDS_IB_DEFAULT_RECV_WR	512
+#define RDS_IB_DEFAULT_SEND_WR	128
 
 #define RDS_IB_SUPPORTED_PROTOCOLS	0x00000003	/* minor versions supported */
 
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index a49e394..fd56481 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -296,7 +296,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	 */
 	ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
 	if (ret) {
-		rdsdebug("ib_req_notify_cq failed: %d\n", ret);
+		rdsdebug("rdma_create_qp failed: %d\n", ret);
 		goto out;
 	}
 
@@ -388,8 +388,6 @@ static u32 rds_ib_protocol_compatible(const struct rds_ib_connect_private *dp)
 static int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 				    struct rdma_cm_event *event)
 {
-	__be64 lguid = cm_id->route.path_rec->sgid.global.interface_id;
-	__be64 fguid = cm_id->route.path_rec->dgid.global.interface_id;
 	const struct rds_ib_connect_private *dp = event->param.conn.private_data;
 	struct rds_ib_connect_private dp_rep;
 	struct rds_connection *conn = NULL;
@@ -404,11 +402,9 @@ static int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 	if (!version)
 		goto out;
 
-	rdsdebug("saddr %u.%u.%u.%u daddr %u.%u.%u.%u RDSv%u.%u lguid 0x%llx fguid "
-		 "0x%llx\n", NIPQUAD(dp->dp_saddr), NIPQUAD(dp->dp_daddr),
-		 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version),
-		 (unsigned long long)be64_to_cpu(lguid),
-		 (unsigned long long)be64_to_cpu(fguid));
+	rdsdebug("saddr %u.%u.%u.%u daddr %u.%u.%u.%u RDSv%u.%u\n",
+		 NIPQUAD(dp->dp_saddr), NIPQUAD(dp->dp_daddr),
+		 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version));
 
 	conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport,
 			       GFP_KERNEL);
@@ -628,7 +624,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
 
 	dest.sin_family = AF_INET;
 	dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
-	dest.sin_port = (__force u16)htons(RDS_PORT);
+	dest.sin_port = (__force u16)htons(RDS_IB_PORT);
 
 	ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
 				(struct sockaddr *)&dest,
@@ -813,7 +809,7 @@ int __init rds_ib_listen_init(void)
 
 	sin.sin_family = PF_INET,
 	sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
-	sin.sin_port = (__force u16)htons(RDS_PORT);
+	sin.sin_port = (__force u16)htons(RDS_IB_PORT);
 
 	/*
 	 * XXX I bet this binds the cm_id to a device.  If we want to support
@@ -833,7 +829,7 @@ int __init rds_ib_listen_init(void)
 		goto out;
 	}
 
-	rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT);
+	rdsdebug("cm %p listening on port %u\n", cm_id, RDS_IB_PORT);
 
 	rds_ib_listen_id = cm_id;
 	cm_id = NULL;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 03031e2..6c17a4d 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -26,8 +26,8 @@
  *
  * port 18633 was the version that had ack frames on the wire.
  */
-#define RDS_PORT	18634
-
+#define RDS_TCP_PORT	18634
+#define RDS_IB_PORT	18635
 
 #ifndef AF_RDS
 #define AF_RDS          28      /* Reliable Datagram Socket     */
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 0389a99..298e372 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -96,7 +96,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
 
 	dest.sin_family = AF_INET;
 	dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
-	dest.sin_port = (__force u16)htons(RDS_PORT);
+	dest.sin_port = (__force u16)htons(RDS_TCP_PORT);
 
 	/* 
 	 * once we call connect() we can start getting callbacks and they
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index caeacbe..50709b7 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -159,7 +159,7 @@ int __init rds_tcp_listen_init(void)
 
 	sin.sin_family = PF_INET,
 	sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
-	sin.sin_port = (__force u16)htons(RDS_PORT);
+	sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
 
 	ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
 	if (ret < 0)



More information about the general mailing list