[ofa-general] [PATCH RFC] rds: add iwarp support

Olaf Kirch olaf.kirch at oracle.com
Wed Jul 9 07:57:08 PDT 2008


On Wednesday 09 July 2008 16:34:41 Jon Mason wrote:
> After talking it over with Steve, the max_qp_wr returned from
> ib_query_device should tell the max size of each individual WR queue.
> I'll write a quick patch to do that and send it out shortly.

Don't worry, I have that already. I'll push my patches to my
git tree in an hour or so.

Olaf

> 
> Thanks,
> Jon
> 
> > 
> > > The lguid and fguid cannot be used for iWARP as those structs do not
> > > exists and give off a nasty illegal pointer deference.  I removed the
> > > references to those, as the appear to only be for debug messages.
> > 
> > Fine with me.
> > 
> > > Lastly, this has the port space differentiation (which is the only way
> > > TCP and iWARP can happily co-exist).
> > 
> > Okay.
> > 
> > Olaf
> > 
> > On Tuesday 08 July 2008 22:27:55 Jon Mason wrote:
> > > Hey Olaf,
> > > With the patch below on top of your previous patch, I am able to get it
> > > working over iWARP (though RDMA is still breaking, but in a new and
> > > different way).
> > > 
> > > The patch modifies rds_ib_laddr_check to use rdma_bind_addr to determine
> > > if it is a IB/iWARP device by creating a temporary rdma_cm_id and
> > > binding to the address passed in.  I'm sure this can be optimized, but
> > > I'll leave that as a todo for later.
> > > 
> > > This patch reduces the default send_wr and recv_wr to sizes that Chelsio
> > > can handle, as the sizes before were too large.  Optimally, this should
> > > be determined with a query of the device.  I have added that to my todo
> > > list as well
> > > 
> > > The lguid and fguid cannot be used for iWARP as those structs do not
> > > exists and give off a nasty illegal pointer deference.  I removed the
> > > references to those, as the appear to only be for debug messages.
> > > 
> > > Lastly, this has the port space differentiation (which is the only way
> > > TCP and iWARP can happily co-exist).
> > > 
> > > Let me know what you think.
> > > 
> > > Thanks,
> > > Jon
> > > 
> > > Signed-off-by: Jon Mason <jon at opengridcomputing.com>
> > > 
> > > diff --git a/net/rds/ib.c b/net/rds/ib.c
> > > index cd2dc7c..067fab0 100644
> > > --- a/net/rds/ib.c
> > > +++ b/net/rds/ib.c
> > > @@ -158,17 +158,29 @@ void rds_ib_remove_one(struct ib_device *device)
> > >   */
> > >  static int rds_ib_laddr_check(__be32 addr)
> > >  {
> > > -	struct net_device *dev;
> > > -	int ret;
> > > +	int ret = -EADDRNOTAVAIL;
> > > +	struct rdma_cm_id *cm_id;
> > > +	struct sockaddr_in sin;
> > > +
> > > +	cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP);
> > > +	if (cm_id == NULL) {
> > > +		printk("rdma_create_id failed\n");
> > > +		return -EADDRNOTAVAIL;
> > > +	}
> > >  
> > > -	dev = ip_dev_find(addr);
> > > -	if (dev && dev->type == ARPHRD_INFINIBAND) {
> > > -		dev_put(dev);
> > > -		ret = 0;
> > > -	} else
> > > +	memset(&sin, 0, sizeof(sin));
> > > +	sin.sin_family = AF_INET;
> > > +	sin.sin_addr.s_addr = addr;
> > > +
> > > +	/* rdma_bind_addr will fail for non-IB/iWARP devices */
> > > +	ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
> > > +        if (ret)
> > >  		ret = -EADDRNOTAVAIL;
> > >  
> > > -	rdsdebug("addr %u.%u.%u.%u ret %d\n", NIPQUAD(addr), ret);
> > > +	rdsdebug("addr %u.%u.%u.%u ret %d node type %d\n",
> > > +		 NIPQUAD(addr), ret, cm_id->device?cm_id->device->node_type:-1);
> > > +
> > > +	rdma_destroy_id(cm_id);
> > >  
> > >  	return ret;
> > >  }
> > > diff --git a/net/rds/ib.h b/net/rds/ib.h
> > > index 947977c..2924fda 100644
> > > --- a/net/rds/ib.h
> > > +++ b/net/rds/ib.h
> > > @@ -7,14 +7,16 @@
> > >  
> > >  #define RDS_IB_RESOLVE_TIMEOUT_MS	5000
> > >  
> > > -#define RDS_FMR_SIZE			256
> > > +/* FIXME - use ib_query_device to determine proper value */
> > > +#define RDS_FMR_SIZE			20 
> > >  #define RDS_FMR_POOL_SIZE		2048
> > >  
> > > -#define RDS_IB_MAX_SGE			8
> > > +#define RDS_IB_MAX_SGE		8
> > >  #define RDS_IB_RECV_SGE 		2
> > >  
> > > -#define RDS_IB_DEFAULT_RECV_WR		1024
> > > -#define RDS_IB_DEFAULT_SEND_WR		256
> > > +/* FIXME - call ib_query_device to determine sane values for this based on the HW */
> > > +#define RDS_IB_DEFAULT_RECV_WR	512
> > > +#define RDS_IB_DEFAULT_SEND_WR	128
> > >  
> > >  #define RDS_IB_SUPPORTED_PROTOCOLS	0x00000003	/* minor versions supported */
> > >  
> > > diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
> > > index a49e394..fd56481 100644
> > > --- a/net/rds/ib_cm.c
> > > +++ b/net/rds/ib_cm.c
> > > @@ -296,7 +296,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
> > >  	 */
> > >  	ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
> > >  	if (ret) {
> > > -		rdsdebug("ib_req_notify_cq failed: %d\n", ret);
> > > +		rdsdebug("rdma_create_qp failed: %d\n", ret);
> > >  		goto out;
> > >  	}
> > >  
> > > @@ -388,8 +388,6 @@ static u32 rds_ib_protocol_compatible(const struct rds_ib_connect_private *dp)
> > >  static int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
> > >  				    struct rdma_cm_event *event)
> > >  {
> > > -	__be64 lguid = cm_id->route.path_rec->sgid.global.interface_id;
> > > -	__be64 fguid = cm_id->route.path_rec->dgid.global.interface_id;
> > >  	const struct rds_ib_connect_private *dp = event->param.conn.private_data;
> > >  	struct rds_ib_connect_private dp_rep;
> > >  	struct rds_connection *conn = NULL;
> > > @@ -404,11 +402,9 @@ static int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
> > >  	if (!version)
> > >  		goto out;
> > >  
> > > -	rdsdebug("saddr %u.%u.%u.%u daddr %u.%u.%u.%u RDSv%u.%u lguid 0x%llx fguid "
> > > -		 "0x%llx\n", NIPQUAD(dp->dp_saddr), NIPQUAD(dp->dp_daddr),
> > > -		 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version),
> > > -		 (unsigned long long)be64_to_cpu(lguid),
> > > -		 (unsigned long long)be64_to_cpu(fguid));
> > > +	rdsdebug("saddr %u.%u.%u.%u daddr %u.%u.%u.%u RDSv%u.%u\n",
> > > +		 NIPQUAD(dp->dp_saddr), NIPQUAD(dp->dp_daddr),
> > > +		 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version));
> > >  
> > >  	conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport,
> > >  			       GFP_KERNEL);
> > > @@ -628,7 +624,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
> > >  
> > >  	dest.sin_family = AF_INET;
> > >  	dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
> > > -	dest.sin_port = (__force u16)htons(RDS_PORT);
> > > +	dest.sin_port = (__force u16)htons(RDS_IB_PORT);
> > >  
> > >  	ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
> > >  				(struct sockaddr *)&dest,
> > > @@ -813,7 +809,7 @@ int __init rds_ib_listen_init(void)
> > >  
> > >  	sin.sin_family = PF_INET,
> > >  	sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
> > > -	sin.sin_port = (__force u16)htons(RDS_PORT);
> > > +	sin.sin_port = (__force u16)htons(RDS_IB_PORT);
> > >  
> > >  	/*
> > >  	 * XXX I bet this binds the cm_id to a device.  If we want to support
> > > @@ -833,7 +829,7 @@ int __init rds_ib_listen_init(void)
> > >  		goto out;
> > >  	}
> > >  
> > > -	rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT);
> > > +	rdsdebug("cm %p listening on port %u\n", cm_id, RDS_IB_PORT);
> > >  
> > >  	rds_ib_listen_id = cm_id;
> > >  	cm_id = NULL;
> > > diff --git a/net/rds/rds.h b/net/rds/rds.h
> > > index 03031e2..6c17a4d 100644
> > > --- a/net/rds/rds.h
> > > +++ b/net/rds/rds.h
> > > @@ -26,8 +26,8 @@
> > >   *
> > >   * port 18633 was the version that had ack frames on the wire.
> > >   */
> > > -#define RDS_PORT	18634
> > > -
> > > +#define RDS_TCP_PORT	18634
> > > +#define RDS_IB_PORT	18635
> > >  
> > >  #ifndef AF_RDS
> > >  #define AF_RDS          28      /* Reliable Datagram Socket     */
> > > diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
> > > index 0389a99..298e372 100644
> > > --- a/net/rds/tcp_connect.c
> > > +++ b/net/rds/tcp_connect.c
> > > @@ -96,7 +96,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
> > >  
> > >  	dest.sin_family = AF_INET;
> > >  	dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
> > > -	dest.sin_port = (__force u16)htons(RDS_PORT);
> > > +	dest.sin_port = (__force u16)htons(RDS_TCP_PORT);
> > >  
> > >  	/* 
> > >  	 * once we call connect() we can start getting callbacks and they
> > > diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
> > > index caeacbe..50709b7 100644
> > > --- a/net/rds/tcp_listen.c
> > > +++ b/net/rds/tcp_listen.c
> > > @@ -159,7 +159,7 @@ int __init rds_tcp_listen_init(void)
> > >  
> > >  	sin.sin_family = PF_INET,
> > >  	sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
> > > -	sin.sin_port = (__force u16)htons(RDS_PORT);
> > > +	sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
> > >  
> > >  	ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
> > >  	if (ret < 0)
> > > _______________________________________________
> > > general mailing list
> > > general at lists.openfabrics.org
> > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
> > > 
> > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
> > > 
> > 
> > 
> > 
> > -- 
> > Olaf Kirch  |  --- o --- Nous sommes du soleil we love when we play
> > okir at lst.de |    / | \   sol.dhoop.naytheet.ah kin.ir.samse.qurax
> _______________________________________________
> general mailing list
> general at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
> 
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
> 



-- 
Olaf Kirch  |  --- o --- Nous sommes du soleil we love when we play
okir at lst.de |    / | \   sol.dhoop.naytheet.ah kin.ir.samse.qurax



More information about the general mailing list