[openib-general] [PATCH] rdma_lat-09 and results

Grant Grundler iod00d at hp.com
Wed Jun 8 16:45:32 PDT 2005


On Wed, Jun 01, 2005 at 05:52:28PM -0700, Grant Grundler wrote:
> Michael,
> 
> Good news:
> 	My next cleanup of rdma_lat.c is working and patch is appended.

Michael,
Did you want to comment on this patch or apply it?
I can resend privately it you don't have the original mail.

> 	Summary of changes below.
> 
> Bad News:
> 	perf is about ~15 cycles slower since the last time I tested.
> 	(Hrm...maybe it's time to cycle power on the TS90 switch again.)

Seems with discussion on perf was distracting from the patch.
I'll seperate those next time.

thanks,
grant

> Here's with the new rdma_lat.c:
> grundler at gsyprf3:/usr/src/openib_gen2/src/userspace/perftest$ ./rdma_lat  -C
>    local address: LID 0x27 QPN 0x80406 PSN 0x9188f7 RKey 0x300434 VAddr 0x6000000000014001
>   remote address: LID 0x25 QPN 0x70406 PSN 0x5d4824 RKey 0x2a0434 VAddr 0x6000000000014001
> Latency typical: 7140 cycles
> Latency best   : 6915 cycles
> Latency worst  : 52915.5 cycles
> grundler at gsyprf3:/usr/src/openib_gen2/src/userspace/perftest$ 
> 
> And the "client" side:
> grundler at iota:/usr/src/openib_gen2/src/userspace/perftest$ ./rdma_lat -C 10.0.0.51
>    local address: LID 0x25 QPN 0x70406 PSN 0x5d4824 RKey 0x2a0434 VAddr 0x6000000000014001
>   remote address: LID 0x27 QPN 0x80406 PSN 0x9188f7 RKey 0x300434 VAddr 0x6000000000014001
> Latency typical: 7140 cycles
> Latency best   : 6907 cycles
> Latency worst  : 94920 cycles
> 
> 
> The previous set of rdma_lat results are here:
>     http://openib.org/pipermail/openib-general/2005-May/006721.html
> 
> I'll guess the previous SVN verion was no older than r2229.
> 
> 
> I get 7140 to 7151 for the original rdma_lat.   Usually 7147.5.
> I get 7132 to 7155 with my version of rdma_lat. Usually 7140.
> No statistically significant differences.
> Both essentially agree on the higher result.
> Using "-n 10000" gave more consistent results *
> 
> I use "taskset" to bind the rdma_lat test to a CPU.
> But it didn't matter which CPU I bound the task to - results
> where basically the same.  I suspect the "stream" mode just
> does not depend on or generating that many interrupts.
> 
> 
> diffstat rdma_lat.c-09-diff 
>  rdma_lat.c |  395 +++++++++++++++++++++++++++++--------------------------------
>  1 files changed, 188 insertions(+), 207 deletions(-)
> 
> Commit Log entry/Summary of changes:
> 	o move device lookup from main() to pp_find_dev()
> 	o move sockfd handling code to pp_open_port()
> 	o consolidate server/client "key exchange" code path
> 	o enumerate return values in main()
> 	o fixed nit: pp_*_exch_dest was called twice.
> 	  Each time it would malloc a new "rem_dest".
> 	  Code in pp_open_port() now free()'s the first one.
> 
> Signed-off-by: Grant Grundler <iod00d at hp.com>
> 
> thanks,
> grant
> 
> 
> 
> Index: rdma_lat.c
> ===================================================================
> --- rdma_lat.c	(revision 2519)
> +++ rdma_lat.c	(working copy)
> @@ -103,30 +103,40 @@ static uint16_t pp_get_local_lid(struct 
>  	return attr.lid;
>  }
>  
> -static int pp_client_connect(const char *servername, int port)
> +
> +static int pp_connect_sock(const char *servername, int port)
>  {
>  	struct addrinfo *res, *t;
>  	struct addrinfo hints = {
> +		.ai_flags    = AI_PASSIVE,	/* Server only? */
>  		.ai_family   = AF_UNSPEC,
>  		.ai_socktype = SOCK_STREAM
>  	};
>  	char *service;
> -	int n;
>  	int sockfd = -1;
> +	int n;
>  
>  	asprintf(&service, "%d", port);
>  	n = getaddrinfo(servername, service, &hints, &res);
>  
>  	if (n < 0) {
> -		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
> +		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n),
> +				 servername ? servername : "NULL", port);
>  		return n;
>  	}
>  
>  	for (t = res; t; t = t->ai_next) {
>  		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
>  		if (sockfd >= 0) {
> -			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
> -				break;
> +			if (servername) {
> +				if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
> +					break;
> +			} else {
> +				setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
> +				if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
> +					break;
> +			}
> +
>  			close(sockfd);
>  			sockfd = -1;
>  		}
> @@ -134,147 +144,100 @@ static int pp_client_connect(const char 
>  
>  	freeaddrinfo(res);
>  
> -	if (sockfd < 0) {
> -		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
> -		return sockfd;
> +	if (sockfd < 0)
> +		fprintf(stderr, "Couldn't %s to %s:%d\n",
> +				 servername ? "connect" : "bind",
> +				 servername ? servername : "NULL", port);
> +
> +	/* "server" side needs to wait/listen for client to call */
> +	if (!servername) {
> +		int connfd;
> +
> +		listen(sockfd, 1);
> +		connfd = accept(sockfd, NULL, 0);
> +		if (connfd < 0) {
> +			perror("server accept");
> +			fprintf(stderr, "accept() failed\n");
> +		}
> +		close(sockfd);
> +		return connfd;
>  	}
> +
>  	return sockfd;
>  }
>  
> -struct pingpong_dest * pp_client_exch_dest(int sockfd,
> -					   const struct pingpong_dest *my_dest)
> +#define KEY_MSG_SIZE (sizeof "0000:000000:000000:00000000:0000000000000000")
> +#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016Lx"
> +
> +static int pp_write_keys(int sockfd, const struct pingpong_dest *my_dest)
>  {
> -	struct pingpong_dest *rem_dest = NULL;
> -	char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];
> -	int parsed;
> +	char msg[KEY_MSG_SIZE];
> +
> +	sprintf(msg, KEY_PRINT_FMT, my_dest->lid, my_dest->qpn,
> +			my_dest->psn, my_dest->rkey, my_dest->vaddr);
>  
> -	sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,
> -			my_dest->psn,my_dest->rkey,my_dest->vaddr);
>  	if (write(sockfd, msg, sizeof msg) != sizeof msg) {
>  		perror("client write");
>  		fprintf(stderr, "Couldn't send local address\n");
> -		goto out;
> +		return 0;
>  	}
>  
> +	return 1;
> +}
> +
> +static struct pingpong_dest * pp_read_keys(int sockfd, const struct pingpong_dest *my_dest)
> +{
> +	struct pingpong_dest *rem_dest = NULL;
> +	int parsed;
> +	char msg[KEY_MSG_SIZE];
> +
>  	if (read(sockfd, msg, sizeof msg) != sizeof msg) {
> -		perror("client read");
> +		perror("pp_read_keys");
>  		fprintf(stderr, "Couldn't read remote address\n");
> -		goto out;
> +		return NULL;
>  	}
>  
>  	rem_dest = malloc(sizeof *rem_dest);
>  	if (!rem_dest)
> -		goto out;
> +		return NULL;
>  
> -	parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,
> -			&rem_dest->psn,&rem_dest->rkey,&rem_dest->vaddr);
> +	parsed = sscanf(msg, KEY_PRINT_FMT, &rem_dest->lid, &rem_dest->qpn,
> +			&rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
>  
>  	if (parsed != 5) {
> -		fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,
> -				msg);
> +		fprintf(stderr, "Couldn't parse line <%.*s>\n",
> +				(int)sizeof msg, msg);
>  		free(rem_dest);
> -		rem_dest = NULL;
> -		goto out;
> -	}
> -out:
> -	return rem_dest;
> -}
> -
> -int pp_server_connect(int port)
> -{
> -	struct addrinfo *res, *t;
> -	struct addrinfo hints = {
> -		.ai_flags    = AI_PASSIVE,
> -		.ai_family   = AF_UNSPEC,
> -		.ai_socktype = SOCK_STREAM
> -	};
> -	char *service;
> -	int sockfd = -1, connfd;
> -	int n;
> -
> -	asprintf(&service, "%d", port);
> -	n = getaddrinfo(NULL, service, &hints, &res);
> -
> -	if (n < 0) {
> -		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
> -		return n;
> -	}
> -
> -	for (t = res; t; t = t->ai_next) {
> -		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
> -		if (sockfd >= 0) {
> -			n = 1;
> -
> -			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
> -
> -			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
> -				break;
> -			close(sockfd);
> -			sockfd = -1;
> -		}
> -	}
> -
> -	freeaddrinfo(res);
> -
> -	if (sockfd < 0) {
> -		fprintf(stderr, "Couldn't listen to port %d\n", port);
> -		return sockfd;
> -	}
> -
> -	listen(sockfd, 1);
> -	connfd = accept(sockfd, NULL, 0);
> -	if (connfd < 0) {
> -		perror("server accept");
> -		fprintf(stderr, "accept() failed\n");
> -		close(sockfd);
> -		return connfd;
> +		return NULL;
>  	}
>  
> -	close(sockfd);
> -	return connfd;
> +	return rem_dest;
>  }
>  
> -static struct pingpong_dest *pp_server_exch_dest(int connfd, const struct pingpong_dest *my_dest)
> +static struct pingpong_dest * pp_exch_dest(int sockfd, const char *servername,
> +					const struct pingpong_dest *my_dest)
>  {
> -	char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];
>  	struct pingpong_dest *rem_dest = NULL;
> -	int parsed;
> -	int n;
> -
> -	n = read(connfd, msg, sizeof msg);
> -	if (n != sizeof msg) {
> -		perror("server read");
> -		fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
> -		goto out;
> -	}
>  
> -	rem_dest = malloc(sizeof *rem_dest);
> -	if (!rem_dest)
> -		goto out;
> -
> -	parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,
> -			&rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
> -	if (parsed != 5) {
> -		fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,
> -				msg);
> -		free(rem_dest);
> -		rem_dest = NULL;
> -		goto out;
> +	if (servername) {
> +		if (!pp_write_keys(sockfd, my_dest))
> +			goto exch_failed;
> +		rem_dest = pp_read_keys(sockfd, my_dest);
> +		if (!rem_dest)
> +			goto exch_failed;
> +	} else {
> +		rem_dest = pp_read_keys(sockfd, my_dest);
> +		if (!rem_dest)
> +			goto exch_failed;
> +		if (!pp_write_keys(sockfd, my_dest))
> +			goto exch_failed;
>  	}
>  
> -	sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,
> -			my_dest->psn, my_dest->rkey, my_dest->vaddr);
> -	if (write(connfd, msg, sizeof msg) != sizeof msg) {
> -		perror("server write");
> -		fprintf(stderr, "Couldn't send local address\n");
> -		free(rem_dest);
> -		rem_dest = NULL;
> -		goto out;
> -	}
> -out:
> +exch_failed:
>  	return rem_dest;
>  }
>  
> +
>  static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
>  					    int tx_depth, int port)
>  {
> @@ -424,6 +387,86 @@ static int pp_connect_ctx(struct pingpon
>  	return 0;
>  }
>  
> +static struct ibv_device * pp_find_dev( const char *ib_devname)
> +{
> +	struct dlist	*dev_list;
> +	struct ibv_device *ib_dev = NULL;
> +
> +	dev_list = ibv_get_devices();
> +
> +	dlist_start(dev_list);
> +	if (!ib_devname) {
> +		ib_dev = dlist_next(dev_list);
> +		if (!ib_dev)
> +			fprintf(stderr, "No IB devices found\n");
> +	} else {
> +		dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
> +			if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
> +				break;
> +		if (!ib_dev)
> +			fprintf(stderr, "IB device %s not found\n", ib_devname);
> +	}
> +	return ib_dev;
> +}
> +
> +
> +static struct pingpong_dest * pp_open_port(struct pingpong_context *ctx,
> +	const char * servername, int ib_port, int port)
> +{
> +	char addr_fmt[] = "%8s address: LID %#04x QPN %#06x PSN %#06x RKey %#08x VAddr %#016Lx\n";
> +	struct pingpong_dest	my_dest;
> +	struct pingpong_dest	*rem_dest;
> +	int			sockfd;
> +
> +
> +	/* Create connection between client and server.
> +	 * We do it by exchanging data over a TCP socket connection. */
> +
> +	my_dest.lid = pp_get_local_lid(ctx, ib_port);
> +	my_dest.qpn = ctx->qp->qp_num;
> +	my_dest.psn = lrand48() & 0xffffff;
> +	if (!my_dest.lid) {
> +		fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
> +		return NULL;
> +	}
> +	my_dest.rkey = ctx->mr->rkey;
> +	my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
> +
> +	printf(addr_fmt, "local", my_dest.lid, my_dest.qpn, my_dest.psn,
> +			my_dest.rkey, my_dest.vaddr);
> +	
> +
> +	sockfd = pp_connect_sock(servername, port);
> +	if (sockfd < 0) {
> +		printf("pp_connect_sock(%s,%d) failed (%d)!\n",
> +					servername, port, sockfd);
> +		return NULL;
> +	}
> +
> +	rem_dest = pp_exch_dest(sockfd, servername, &my_dest);
> +	if (!rem_dest)
> +		return NULL;
> +
> +	printf(addr_fmt, "remote", rem_dest->lid, rem_dest->qpn, rem_dest->psn,
> +			rem_dest->rkey, rem_dest->vaddr);
> +
> +	if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
> +		return NULL;
> +
> +	/* An additional handshake is required *after* moving qp to RTR.
> +         * Arbitrarily reuse exch_dest for this purpose.
> +	 */
> +
> +	free(rem_dest);
> +	rem_dest = pp_exch_dest(sockfd, servername, &my_dest);
> +
> +	write(sockfd, "done", sizeof "done");
> +	close(sockfd);
> +
> +	return rem_dest;
> +}
> +
> +
>  static void usage(const char *argv0)
>  {
>  	printf("Usage:\n");
> @@ -518,27 +561,26 @@ static void print_report(struct report_o
>  
>  int main(int argc, char *argv[])
>  {
> -	struct dlist		*dev_list;
>  	struct ibv_device	*ib_dev;
>  	struct pingpong_context *ctx;
> -	struct pingpong_dest     my_dest;
>  	struct pingpong_dest    *rem_dest;
>  	char                    *ib_devname = NULL;
>  	char                    *servername = NULL;
> -	int                      port = 18515;
> -	int                      ib_port = 1;
> -	int                      size = 1;
> -	int                      tx_depth = 50;
> -	int                      iters = 1000;
> -	int                      scnt, rcnt, ccnt;
> -	int			 sockfd;
> +
>  	struct ibv_qp		*qp;
>  	struct ibv_send_wr	*wr;
>  	volatile char		*poll_buf;
>  	volatile char		*post_buf;
> -	struct report_options    report = {};
>  
> -	cycles_t	*tstamp;
> +	int			port = 18515;
> +	int			ib_port = 1;
> +	int			size = 1;
> +	int			iters = 1000;
> +        int			tx_depth = 50;
> +	int			scnt, rcnt, ccnt;
> +
> +	static struct report_options    report = {};
> +	static cycles_t	*tstamp;
>  
>  	/* Parameter parsing. */
>  	while (1) {
> @@ -578,25 +620,25 @@ int main(int argc, char *argv[])
>  			ib_port = strtol(optarg, NULL, 0);
>  			if (ib_port < 0) {
>  				usage(argv[0]);
> -				return 1;
> +				return 2;
>  			}
>  			break;
>  
>  		case 's':
>  			size = strtol(optarg, NULL, 0);
> -			if (size < 1) { usage(argv[0]); return 1; }
> +			if (size < 1) { usage(argv[0]); return 3; }
>  			break;
>  
>  		case 't':
>  			tx_depth = strtol(optarg, NULL, 0);
> -			if (tx_depth < 1) { usage(argv[0]); return 1; }
> +			if (tx_depth < 1) { usage(argv[0]); return 4; }
>  			break;
>  
>  		case 'n':
>  			iters = strtol(optarg, NULL, 0);
>  			if (iters < 2) {
>  				usage(argv[0]);
> -				return 1;
> +				return 5;
>  			}
>  
>  			break;
> @@ -615,7 +657,7 @@ int main(int argc, char *argv[])
>  
>  		default:
>  			usage(argv[0]);
> -			return 1;
> +			return 5;
>  		}
>  	}
>  
> @@ -623,90 +665,26 @@ int main(int argc, char *argv[])
>  		servername = strdupa(argv[optind]);
>  	else if (optind < argc) {
>  		usage(argv[0]);
> -		return 1;
> +		return 6;
>  	}
>  
>  
> -	/* Done with parameter parsing. Perform setup. */
> +	/*
> +	 *  Done with parameter parsing. Perform setup.
> +	 */
>  
>  	srand48(getpid() * time(NULL));
> -
>  	page_size = sysconf(_SC_PAGESIZE);
>  
> -	dev_list = ibv_get_devices();
> -
> -	dlist_start(dev_list);
> -	if (!ib_devname) {
> -		ib_dev = dlist_next(dev_list);
> -		if (!ib_dev) {
> -			fprintf(stderr, "No IB devices found\n");
> -			return 1;
> -		}
> -	} else {
> -		dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
> -			if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
> -				break;
> -		if (!ib_dev) {
> -			fprintf(stderr, "IB device %s not found\n", ib_devname);
> -			return 1;
> -		}
> -	}
> +	ib_dev = pp_find_dev(ib_devname);
>  
>  	ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
>  	if (!ctx)
> -		return 1;
> -
> -	/* Create connection between client and server.
> -	 * We do it by exchanging data over a TCP socket connection. */
> -
> -	my_dest.lid = pp_get_local_lid(ctx, ib_port);
> -	my_dest.qpn = ctx->qp->qp_num;
> -	my_dest.psn = lrand48() & 0xffffff;
> -	if (!my_dest.lid) {
> -		fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
> -		return 1;
> -	}
> -	my_dest.rkey = ctx->mr->rkey;
> -	my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
> -
> -	printf("  local address:  LID %#04x, QPN %#06x, PSN %#06x "
> -			"RKey %#08x VAddr %#016Lx\n",
> -			my_dest.lid, my_dest.qpn, my_dest.psn,
> -			my_dest.rkey, my_dest.vaddr);
> -
> -	if (servername) {
> -		sockfd = pp_client_connect(servername, port);
> -		if (sockfd < 0)
> -			return 1;
> -		rem_dest = pp_client_exch_dest(sockfd, &my_dest);
> -	} else {
> -		sockfd = pp_server_connect(port);
> -		if (sockfd < 0)
> -			return 1;
> -		rem_dest = pp_server_exch_dest(sockfd, &my_dest);
> -	}
> +		return 7;
>  
> +	rem_dest = pp_open_port(ctx, servername, ib_port, port);
>  	if (!rem_dest)
> -		return 1;
> -
> -	printf("  remote address: LID %#04x, QPN %#06x, PSN %#06x, "
> -			"RKey %#08x VAddr %#016Lx\n",
> -			rem_dest->lid, rem_dest->qpn, rem_dest->psn,
> -			rem_dest->rkey, rem_dest->vaddr);
> -
> -	if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
> -		return 1;
> -
> -	/* An additional handshake is required *after* moving qp to RTR.
> -           Arbitrarily reuse exch_dest for this purpose. */
> -	if (servername) {
> -		rem_dest = pp_client_exch_dest(sockfd, &my_dest);
> -	} else {
> -		rem_dest = pp_server_exch_dest(sockfd, &my_dest);
> -	}
> -
> -	write(sockfd, "done", sizeof "done");
> -	close(sockfd);
> +		return 8;
>  
>  	wr = &ctx->wr;
>  	ctx->list.addr = (uintptr_t) ctx->buf;
> @@ -726,7 +704,7 @@ int main(int argc, char *argv[])
>  
>  	if (!tstamp) {
>  		perror("malloc");
> -		return 1;
> +		return 9;
>  	}
>  
>  	/* Done with setup. Start the test. */
> @@ -736,8 +714,8 @@ int main(int argc, char *argv[])
>  		/* Wait till buffer changes. */
>  		if (rcnt < iters && !(scnt < 1 && servername)) {
>  			++rcnt;
> -			while (*poll_buf != (char)rcnt) {
> -			}
> +			while (*poll_buf != (char)rcnt)
> +				;
>  			/* Here the data is already in the physical memory.
>  			   If we wanted to actually use it, we may need
>  			   a read memory barrier here. */
> @@ -751,7 +729,8 @@ int main(int argc, char *argv[])
>  			if (ibv_post_send(qp, wr, &bad_wr)) {
>  				fprintf(stderr, "Couldn't post send: scnt=%d\n",
>  					scnt);
> -				return 1;
> +				free(tstamp);
> +				return 10;
>  			}
>  		}
>  
> @@ -765,7 +744,8 @@ int main(int argc, char *argv[])
>  
>  			if (ne < 0) {
>  				fprintf(stderr, "poll CQ failed %d\n", ne);
> -				return 1;
> +				free(tstamp);
> +				return 11;
>  			}
>  			if (wc.status != IBV_WC_SUCCESS) {
>  				fprintf(stderr, "Completion wth error at %s:\n",
> @@ -774,7 +754,8 @@ int main(int argc, char *argv[])
>  					wc.status, (int) wc.wr_id);
>  				fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",
>  					scnt, rcnt, ccnt);
> -				return 1;
> +				free(tstamp);
> +				return 12;
>  			}
>  		}
>  	}
> _______________________________________________
> openib-general mailing list
> openib-general at openib.org
> http://openib.org/mailman/listinfo/openib-general
> 
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general



More information about the general mailing list