[openib-general] [PATCH] rdma_lat-09 and results

Grant Grundler iod00d at hp.com
Wed Jun 1 17:52:28 PDT 2005


Michael,

Good news:
	My next cleanup of rdma_lat.c is working and patch is appended.
	Summary of changes below.

Bad News:
	perf is about ~15 cycles slower since the last time I tested.
	(Hrm...maybe it's time to cycle power on the TS90 switch again.)


Here's with the new rdma_lat.c:
grundler at gsyprf3:/usr/src/openib_gen2/src/userspace/perftest$ ./rdma_lat  -C
   local address: LID 0x27 QPN 0x80406 PSN 0x9188f7 RKey 0x300434 VAddr 0x6000000000014001
  remote address: LID 0x25 QPN 0x70406 PSN 0x5d4824 RKey 0x2a0434 VAddr 0x6000000000014001
Latency typical: 7140 cycles
Latency best   : 6915 cycles
Latency worst  : 52915.5 cycles
grundler at gsyprf3:/usr/src/openib_gen2/src/userspace/perftest$ 

And the "client" side:
grundler at iota:/usr/src/openib_gen2/src/userspace/perftest$ ./rdma_lat -C 10.0.0.51
   local address: LID 0x25 QPN 0x70406 PSN 0x5d4824 RKey 0x2a0434 VAddr 0x6000000000014001
  remote address: LID 0x27 QPN 0x80406 PSN 0x9188f7 RKey 0x300434 VAddr 0x6000000000014001
Latency typical: 7140 cycles
Latency best   : 6907 cycles
Latency worst  : 94920 cycles


The previous set of rdma_lat results are here:
    http://openib.org/pipermail/openib-general/2005-May/006721.html

I'll guess the previous SVN verion was no older than r2229.


I get 7140 to 7151 for the original rdma_lat.   Usually 7147.5.
I get 7132 to 7155 with my version of rdma_lat. Usually 7140.
No statistically significant differences.
Both essentially agree on the higher result.
Using "-n 10000" gave more consistent results *

I use "taskset" to bind the rdma_lat test to a CPU.
But it didn't matter which CPU I bound the task to - results
where basically the same.  I suspect the "stream" mode just
does not depend on or generating that many interrupts.


diffstat rdma_lat.c-09-diff 
 rdma_lat.c |  395 +++++++++++++++++++++++++++++--------------------------------
 1 files changed, 188 insertions(+), 207 deletions(-)

Commit Log entry/Summary of changes:
	o move device lookup from main() to pp_find_dev()
	o move sockfd handling code to pp_open_port()
	o consolidate server/client "key exchange" code path
	o enumerate return values in main()
	o fixed nit: pp_*_exch_dest was called twice.
	  Each time it would malloc a new "rem_dest".
	  Code in pp_open_port() now free()'s the first one.

Signed-off-by: Grant Grundler <iod00d at hp.com>

thanks,
grant



Index: rdma_lat.c
===================================================================
--- rdma_lat.c	(revision 2519)
+++ rdma_lat.c	(working copy)
@@ -103,30 +103,40 @@ static uint16_t pp_get_local_lid(struct 
 	return attr.lid;
 }
 
-static int pp_client_connect(const char *servername, int port)
+
+static int pp_connect_sock(const char *servername, int port)
 {
 	struct addrinfo *res, *t;
 	struct addrinfo hints = {
+		.ai_flags    = AI_PASSIVE,	/* Server only? */
 		.ai_family   = AF_UNSPEC,
 		.ai_socktype = SOCK_STREAM
 	};
 	char *service;
-	int n;
 	int sockfd = -1;
+	int n;
 
 	asprintf(&service, "%d", port);
 	n = getaddrinfo(servername, service, &hints, &res);
 
 	if (n < 0) {
-		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
+		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n),
+				 servername ? servername : "NULL", port);
 		return n;
 	}
 
 	for (t = res; t; t = t->ai_next) {
 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
 		if (sockfd >= 0) {
-			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
-				break;
+			if (servername) {
+				if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
+					break;
+			} else {
+				setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
+				if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
+					break;
+			}
+
 			close(sockfd);
 			sockfd = -1;
 		}
@@ -134,147 +144,100 @@ static int pp_client_connect(const char 
 
 	freeaddrinfo(res);
 
-	if (sockfd < 0) {
-		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
-		return sockfd;
+	if (sockfd < 0)
+		fprintf(stderr, "Couldn't %s to %s:%d\n",
+				 servername ? "connect" : "bind",
+				 servername ? servername : "NULL", port);
+
+	/* "server" side needs to wait/listen for client to call */
+	if (!servername) {
+		int connfd;
+
+		listen(sockfd, 1);
+		connfd = accept(sockfd, NULL, 0);
+		if (connfd < 0) {
+			perror("server accept");
+			fprintf(stderr, "accept() failed\n");
+		}
+		close(sockfd);
+		return connfd;
 	}
+
 	return sockfd;
 }
 
-struct pingpong_dest * pp_client_exch_dest(int sockfd,
-					   const struct pingpong_dest *my_dest)
+#define KEY_MSG_SIZE (sizeof "0000:000000:000000:00000000:0000000000000000")
+#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016Lx"
+
+static int pp_write_keys(int sockfd, const struct pingpong_dest *my_dest)
 {
-	struct pingpong_dest *rem_dest = NULL;
-	char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];
-	int parsed;
+	char msg[KEY_MSG_SIZE];
+
+	sprintf(msg, KEY_PRINT_FMT, my_dest->lid, my_dest->qpn,
+			my_dest->psn, my_dest->rkey, my_dest->vaddr);
 
-	sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,
-			my_dest->psn,my_dest->rkey,my_dest->vaddr);
 	if (write(sockfd, msg, sizeof msg) != sizeof msg) {
 		perror("client write");
 		fprintf(stderr, "Couldn't send local address\n");
-		goto out;
+		return 0;
 	}
 
+	return 1;
+}
+
+static struct pingpong_dest * pp_read_keys(int sockfd, const struct pingpong_dest *my_dest)
+{
+	struct pingpong_dest *rem_dest = NULL;
+	int parsed;
+	char msg[KEY_MSG_SIZE];
+
 	if (read(sockfd, msg, sizeof msg) != sizeof msg) {
-		perror("client read");
+		perror("pp_read_keys");
 		fprintf(stderr, "Couldn't read remote address\n");
-		goto out;
+		return NULL;
 	}
 
 	rem_dest = malloc(sizeof *rem_dest);
 	if (!rem_dest)
-		goto out;
+		return NULL;
 
-	parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,
-			&rem_dest->psn,&rem_dest->rkey,&rem_dest->vaddr);
+	parsed = sscanf(msg, KEY_PRINT_FMT, &rem_dest->lid, &rem_dest->qpn,
+			&rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
 
 	if (parsed != 5) {
-		fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,
-				msg);
+		fprintf(stderr, "Couldn't parse line <%.*s>\n",
+				(int)sizeof msg, msg);
 		free(rem_dest);
-		rem_dest = NULL;
-		goto out;
-	}
-out:
-	return rem_dest;
-}
-
-int pp_server_connect(int port)
-{
-	struct addrinfo *res, *t;
-	struct addrinfo hints = {
-		.ai_flags    = AI_PASSIVE,
-		.ai_family   = AF_UNSPEC,
-		.ai_socktype = SOCK_STREAM
-	};
-	char *service;
-	int sockfd = -1, connfd;
-	int n;
-
-	asprintf(&service, "%d", port);
-	n = getaddrinfo(NULL, service, &hints, &res);
-
-	if (n < 0) {
-		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
-		return n;
-	}
-
-	for (t = res; t; t = t->ai_next) {
-		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
-		if (sockfd >= 0) {
-			n = 1;
-
-			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
-
-			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
-				break;
-			close(sockfd);
-			sockfd = -1;
-		}
-	}
-
-	freeaddrinfo(res);
-
-	if (sockfd < 0) {
-		fprintf(stderr, "Couldn't listen to port %d\n", port);
-		return sockfd;
-	}
-
-	listen(sockfd, 1);
-	connfd = accept(sockfd, NULL, 0);
-	if (connfd < 0) {
-		perror("server accept");
-		fprintf(stderr, "accept() failed\n");
-		close(sockfd);
-		return connfd;
+		return NULL;
 	}
 
-	close(sockfd);
-	return connfd;
+	return rem_dest;
 }
 
-static struct pingpong_dest *pp_server_exch_dest(int connfd, const struct pingpong_dest *my_dest)
+static struct pingpong_dest * pp_exch_dest(int sockfd, const char *servername,
+					const struct pingpong_dest *my_dest)
 {
-	char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];
 	struct pingpong_dest *rem_dest = NULL;
-	int parsed;
-	int n;
-
-	n = read(connfd, msg, sizeof msg);
-	if (n != sizeof msg) {
-		perror("server read");
-		fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
-		goto out;
-	}
 
-	rem_dest = malloc(sizeof *rem_dest);
-	if (!rem_dest)
-		goto out;
-
-	parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,
-			&rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
-	if (parsed != 5) {
-		fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,
-				msg);
-		free(rem_dest);
-		rem_dest = NULL;
-		goto out;
+	if (servername) {
+		if (!pp_write_keys(sockfd, my_dest))
+			goto exch_failed;
+		rem_dest = pp_read_keys(sockfd, my_dest);
+		if (!rem_dest)
+			goto exch_failed;
+	} else {
+		rem_dest = pp_read_keys(sockfd, my_dest);
+		if (!rem_dest)
+			goto exch_failed;
+		if (!pp_write_keys(sockfd, my_dest))
+			goto exch_failed;
 	}
 
-	sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,
-			my_dest->psn, my_dest->rkey, my_dest->vaddr);
-	if (write(connfd, msg, sizeof msg) != sizeof msg) {
-		perror("server write");
-		fprintf(stderr, "Couldn't send local address\n");
-		free(rem_dest);
-		rem_dest = NULL;
-		goto out;
-	}
-out:
+exch_failed:
 	return rem_dest;
 }
 
+
 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
 					    int tx_depth, int port)
 {
@@ -424,6 +387,86 @@ static int pp_connect_ctx(struct pingpon
 	return 0;
 }
 
+static struct ibv_device * pp_find_dev( const char *ib_devname)
+{
+	struct dlist	*dev_list;
+	struct ibv_device *ib_dev = NULL;
+
+	dev_list = ibv_get_devices();
+
+	dlist_start(dev_list);
+	if (!ib_devname) {
+		ib_dev = dlist_next(dev_list);
+		if (!ib_dev)
+			fprintf(stderr, "No IB devices found\n");
+	} else {
+		dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
+			if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+				break;
+		if (!ib_dev)
+			fprintf(stderr, "IB device %s not found\n", ib_devname);
+	}
+	return ib_dev;
+}
+
+
+static struct pingpong_dest * pp_open_port(struct pingpong_context *ctx,
+	const char * servername, int ib_port, int port)
+{
+	char addr_fmt[] = "%8s address: LID %#04x QPN %#06x PSN %#06x RKey %#08x VAddr %#016Lx\n";
+	struct pingpong_dest	my_dest;
+	struct pingpong_dest	*rem_dest;
+	int			sockfd;
+
+
+	/* Create connection between client and server.
+	 * We do it by exchanging data over a TCP socket connection. */
+
+	my_dest.lid = pp_get_local_lid(ctx, ib_port);
+	my_dest.qpn = ctx->qp->qp_num;
+	my_dest.psn = lrand48() & 0xffffff;
+	if (!my_dest.lid) {
+		fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
+		return NULL;
+	}
+	my_dest.rkey = ctx->mr->rkey;
+	my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
+
+	printf(addr_fmt, "local", my_dest.lid, my_dest.qpn, my_dest.psn,
+			my_dest.rkey, my_dest.vaddr);
+	
+
+	sockfd = pp_connect_sock(servername, port);
+	if (sockfd < 0) {
+		printf("pp_connect_sock(%s,%d) failed (%d)!\n",
+					servername, port, sockfd);
+		return NULL;
+	}
+
+	rem_dest = pp_exch_dest(sockfd, servername, &my_dest);
+	if (!rem_dest)
+		return NULL;
+
+	printf(addr_fmt, "remote", rem_dest->lid, rem_dest->qpn, rem_dest->psn,
+			rem_dest->rkey, rem_dest->vaddr);
+
+	if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
+		return NULL;
+
+	/* An additional handshake is required *after* moving qp to RTR.
+         * Arbitrarily reuse exch_dest for this purpose.
+	 */
+
+	free(rem_dest);
+	rem_dest = pp_exch_dest(sockfd, servername, &my_dest);
+
+	write(sockfd, "done", sizeof "done");
+	close(sockfd);
+
+	return rem_dest;
+}
+
+
 static void usage(const char *argv0)
 {
 	printf("Usage:\n");
@@ -518,27 +561,26 @@ static void print_report(struct report_o
 
 int main(int argc, char *argv[])
 {
-	struct dlist		*dev_list;
 	struct ibv_device	*ib_dev;
 	struct pingpong_context *ctx;
-	struct pingpong_dest     my_dest;
 	struct pingpong_dest    *rem_dest;
 	char                    *ib_devname = NULL;
 	char                    *servername = NULL;
-	int                      port = 18515;
-	int                      ib_port = 1;
-	int                      size = 1;
-	int                      tx_depth = 50;
-	int                      iters = 1000;
-	int                      scnt, rcnt, ccnt;
-	int			 sockfd;
+
 	struct ibv_qp		*qp;
 	struct ibv_send_wr	*wr;
 	volatile char		*poll_buf;
 	volatile char		*post_buf;
-	struct report_options    report = {};
 
-	cycles_t	*tstamp;
+	int			port = 18515;
+	int			ib_port = 1;
+	int			size = 1;
+	int			iters = 1000;
+        int			tx_depth = 50;
+	int			scnt, rcnt, ccnt;
+
+	static struct report_options    report = {};
+	static cycles_t	*tstamp;
 
 	/* Parameter parsing. */
 	while (1) {
@@ -578,25 +620,25 @@ int main(int argc, char *argv[])
 			ib_port = strtol(optarg, NULL, 0);
 			if (ib_port < 0) {
 				usage(argv[0]);
-				return 1;
+				return 2;
 			}
 			break;
 
 		case 's':
 			size = strtol(optarg, NULL, 0);
-			if (size < 1) { usage(argv[0]); return 1; }
+			if (size < 1) { usage(argv[0]); return 3; }
 			break;
 
 		case 't':
 			tx_depth = strtol(optarg, NULL, 0);
-			if (tx_depth < 1) { usage(argv[0]); return 1; }
+			if (tx_depth < 1) { usage(argv[0]); return 4; }
 			break;
 
 		case 'n':
 			iters = strtol(optarg, NULL, 0);
 			if (iters < 2) {
 				usage(argv[0]);
-				return 1;
+				return 5;
 			}
 
 			break;
@@ -615,7 +657,7 @@ int main(int argc, char *argv[])
 
 		default:
 			usage(argv[0]);
-			return 1;
+			return 5;
 		}
 	}
 
@@ -623,90 +665,26 @@ int main(int argc, char *argv[])
 		servername = strdupa(argv[optind]);
 	else if (optind < argc) {
 		usage(argv[0]);
-		return 1;
+		return 6;
 	}
 
 
-	/* Done with parameter parsing. Perform setup. */
+	/*
+	 *  Done with parameter parsing. Perform setup.
+	 */
 
 	srand48(getpid() * time(NULL));
-
 	page_size = sysconf(_SC_PAGESIZE);
 
-	dev_list = ibv_get_devices();
-
-	dlist_start(dev_list);
-	if (!ib_devname) {
-		ib_dev = dlist_next(dev_list);
-		if (!ib_dev) {
-			fprintf(stderr, "No IB devices found\n");
-			return 1;
-		}
-	} else {
-		dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
-			if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
-				break;
-		if (!ib_dev) {
-			fprintf(stderr, "IB device %s not found\n", ib_devname);
-			return 1;
-		}
-	}
+	ib_dev = pp_find_dev(ib_devname);
 
 	ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
 	if (!ctx)
-		return 1;
-
-	/* Create connection between client and server.
-	 * We do it by exchanging data over a TCP socket connection. */
-
-	my_dest.lid = pp_get_local_lid(ctx, ib_port);
-	my_dest.qpn = ctx->qp->qp_num;
-	my_dest.psn = lrand48() & 0xffffff;
-	if (!my_dest.lid) {
-		fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
-		return 1;
-	}
-	my_dest.rkey = ctx->mr->rkey;
-	my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
-
-	printf("  local address:  LID %#04x, QPN %#06x, PSN %#06x "
-			"RKey %#08x VAddr %#016Lx\n",
-			my_dest.lid, my_dest.qpn, my_dest.psn,
-			my_dest.rkey, my_dest.vaddr);
-
-	if (servername) {
-		sockfd = pp_client_connect(servername, port);
-		if (sockfd < 0)
-			return 1;
-		rem_dest = pp_client_exch_dest(sockfd, &my_dest);
-	} else {
-		sockfd = pp_server_connect(port);
-		if (sockfd < 0)
-			return 1;
-		rem_dest = pp_server_exch_dest(sockfd, &my_dest);
-	}
+		return 7;
 
+	rem_dest = pp_open_port(ctx, servername, ib_port, port);
 	if (!rem_dest)
-		return 1;
-
-	printf("  remote address: LID %#04x, QPN %#06x, PSN %#06x, "
-			"RKey %#08x VAddr %#016Lx\n",
-			rem_dest->lid, rem_dest->qpn, rem_dest->psn,
-			rem_dest->rkey, rem_dest->vaddr);
-
-	if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
-		return 1;
-
-	/* An additional handshake is required *after* moving qp to RTR.
-           Arbitrarily reuse exch_dest for this purpose. */
-	if (servername) {
-		rem_dest = pp_client_exch_dest(sockfd, &my_dest);
-	} else {
-		rem_dest = pp_server_exch_dest(sockfd, &my_dest);
-	}
-
-	write(sockfd, "done", sizeof "done");
-	close(sockfd);
+		return 8;
 
 	wr = &ctx->wr;
 	ctx->list.addr = (uintptr_t) ctx->buf;
@@ -726,7 +704,7 @@ int main(int argc, char *argv[])
 
 	if (!tstamp) {
 		perror("malloc");
-		return 1;
+		return 9;
 	}
 
 	/* Done with setup. Start the test. */
@@ -736,8 +714,8 @@ int main(int argc, char *argv[])
 		/* Wait till buffer changes. */
 		if (rcnt < iters && !(scnt < 1 && servername)) {
 			++rcnt;
-			while (*poll_buf != (char)rcnt) {
-			}
+			while (*poll_buf != (char)rcnt)
+				;
 			/* Here the data is already in the physical memory.
 			   If we wanted to actually use it, we may need
 			   a read memory barrier here. */
@@ -751,7 +729,8 @@ int main(int argc, char *argv[])
 			if (ibv_post_send(qp, wr, &bad_wr)) {
 				fprintf(stderr, "Couldn't post send: scnt=%d\n",
 					scnt);
-				return 1;
+				free(tstamp);
+				return 10;
 			}
 		}
 
@@ -765,7 +744,8 @@ int main(int argc, char *argv[])
 
 			if (ne < 0) {
 				fprintf(stderr, "poll CQ failed %d\n", ne);
-				return 1;
+				free(tstamp);
+				return 11;
 			}
 			if (wc.status != IBV_WC_SUCCESS) {
 				fprintf(stderr, "Completion wth error at %s:\n",
@@ -774,7 +754,8 @@ int main(int argc, char *argv[])
 					wc.status, (int) wc.wr_id);
 				fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",
 					scnt, rcnt, ccnt);
-				return 1;
+				free(tstamp);
+				return 12;
 			}
 		}
 	}



More information about the general mailing list