[openib-general] [PATCH] rdma_lat-09 and results
Grant Grundler
iod00d at hp.com
Wed Jun 1 17:52:28 PDT 2005
Michael,
Good news:
My next cleanup of rdma_lat.c is working and patch is appended.
Summary of changes below.
Bad News:
perf is about ~15 cycles slower since the last time I tested.
(Hrm...maybe it's time to cycle power on the TS90 switch again.)
Here's with the new rdma_lat.c:
grundler at gsyprf3:/usr/src/openib_gen2/src/userspace/perftest$ ./rdma_lat -C
local address: LID 0x27 QPN 0x80406 PSN 0x9188f7 RKey 0x300434 VAddr 0x6000000000014001
remote address: LID 0x25 QPN 0x70406 PSN 0x5d4824 RKey 0x2a0434 VAddr 0x6000000000014001
Latency typical: 7140 cycles
Latency best : 6915 cycles
Latency worst : 52915.5 cycles
grundler at gsyprf3:/usr/src/openib_gen2/src/userspace/perftest$
And the "client" side:
grundler at iota:/usr/src/openib_gen2/src/userspace/perftest$ ./rdma_lat -C 10.0.0.51
local address: LID 0x25 QPN 0x70406 PSN 0x5d4824 RKey 0x2a0434 VAddr 0x6000000000014001
remote address: LID 0x27 QPN 0x80406 PSN 0x9188f7 RKey 0x300434 VAddr 0x6000000000014001
Latency typical: 7140 cycles
Latency best : 6907 cycles
Latency worst : 94920 cycles
The previous set of rdma_lat results are here:
http://openib.org/pipermail/openib-general/2005-May/006721.html
I'll guess the previous SVN verion was no older than r2229.
I get 7140 to 7151 for the original rdma_lat. Usually 7147.5.
I get 7132 to 7155 with my version of rdma_lat. Usually 7140.
No statistically significant differences.
Both essentially agree on the higher result.
Using "-n 10000" gave more consistent results *
I use "taskset" to bind the rdma_lat test to a CPU.
But it didn't matter which CPU I bound the task to - results
where basically the same. I suspect the "stream" mode just
does not depend on or generating that many interrupts.
diffstat rdma_lat.c-09-diff
rdma_lat.c | 395 +++++++++++++++++++++++++++++--------------------------------
1 files changed, 188 insertions(+), 207 deletions(-)
Commit Log entry/Summary of changes:
o move device lookup from main() to pp_find_dev()
o move sockfd handling code to pp_open_port()
o consolidate server/client "key exchange" code path
o enumerate return values in main()
o fixed nit: pp_*_exch_dest was called twice.
Each time it would malloc a new "rem_dest".
Code in pp_open_port() now free()'s the first one.
Signed-off-by: Grant Grundler <iod00d at hp.com>
thanks,
grant
Index: rdma_lat.c
===================================================================
--- rdma_lat.c (revision 2519)
+++ rdma_lat.c (working copy)
@@ -103,30 +103,40 @@ static uint16_t pp_get_local_lid(struct
return attr.lid;
}
-static int pp_client_connect(const char *servername, int port)
+
+static int pp_connect_sock(const char *servername, int port)
{
struct addrinfo *res, *t;
struct addrinfo hints = {
+ .ai_flags = AI_PASSIVE, /* Server only? */
.ai_family = AF_UNSPEC,
.ai_socktype = SOCK_STREAM
};
char *service;
- int n;
int sockfd = -1;
+ int n;
asprintf(&service, "%d", port);
n = getaddrinfo(servername, service, &hints, &res);
if (n < 0) {
- fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
+ fprintf(stderr, "%s for %s:%d\n", gai_strerror(n),
+ servername ? servername : "NULL", port);
return n;
}
for (t = res; t; t = t->ai_next) {
sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
if (sockfd >= 0) {
- if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
- break;
+ if (servername) {
+ if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
+ break;
+ } else {
+ setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
+ if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
+ break;
+ }
+
close(sockfd);
sockfd = -1;
}
@@ -134,147 +144,100 @@ static int pp_client_connect(const char
freeaddrinfo(res);
- if (sockfd < 0) {
- fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
- return sockfd;
+ if (sockfd < 0)
+ fprintf(stderr, "Couldn't %s to %s:%d\n",
+ servername ? "connect" : "bind",
+ servername ? servername : "NULL", port);
+
+ /* "server" side needs to wait/listen for client to call */
+ if (!servername) {
+ int connfd;
+
+ listen(sockfd, 1);
+ connfd = accept(sockfd, NULL, 0);
+ if (connfd < 0) {
+ perror("server accept");
+ fprintf(stderr, "accept() failed\n");
+ }
+ close(sockfd);
+ return connfd;
}
+
return sockfd;
}
-struct pingpong_dest * pp_client_exch_dest(int sockfd,
- const struct pingpong_dest *my_dest)
+#define KEY_MSG_SIZE (sizeof "0000:000000:000000:00000000:0000000000000000")
+#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016Lx"
+
+static int pp_write_keys(int sockfd, const struct pingpong_dest *my_dest)
{
- struct pingpong_dest *rem_dest = NULL;
- char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];
- int parsed;
+ char msg[KEY_MSG_SIZE];
+
+ sprintf(msg, KEY_PRINT_FMT, my_dest->lid, my_dest->qpn,
+ my_dest->psn, my_dest->rkey, my_dest->vaddr);
- sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,
- my_dest->psn,my_dest->rkey,my_dest->vaddr);
if (write(sockfd, msg, sizeof msg) != sizeof msg) {
perror("client write");
fprintf(stderr, "Couldn't send local address\n");
- goto out;
+ return 0;
}
+ return 1;
+}
+
+static struct pingpong_dest * pp_read_keys(int sockfd, const struct pingpong_dest *my_dest)
+{
+ struct pingpong_dest *rem_dest = NULL;
+ int parsed;
+ char msg[KEY_MSG_SIZE];
+
if (read(sockfd, msg, sizeof msg) != sizeof msg) {
- perror("client read");
+ perror("pp_read_keys");
fprintf(stderr, "Couldn't read remote address\n");
- goto out;
+ return NULL;
}
rem_dest = malloc(sizeof *rem_dest);
if (!rem_dest)
- goto out;
+ return NULL;
- parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,
- &rem_dest->psn,&rem_dest->rkey,&rem_dest->vaddr);
+ parsed = sscanf(msg, KEY_PRINT_FMT, &rem_dest->lid, &rem_dest->qpn,
+ &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
if (parsed != 5) {
- fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,
- msg);
+ fprintf(stderr, "Couldn't parse line <%.*s>\n",
+ (int)sizeof msg, msg);
free(rem_dest);
- rem_dest = NULL;
- goto out;
- }
-out:
- return rem_dest;
-}
-
-int pp_server_connect(int port)
-{
- struct addrinfo *res, *t;
- struct addrinfo hints = {
- .ai_flags = AI_PASSIVE,
- .ai_family = AF_UNSPEC,
- .ai_socktype = SOCK_STREAM
- };
- char *service;
- int sockfd = -1, connfd;
- int n;
-
- asprintf(&service, "%d", port);
- n = getaddrinfo(NULL, service, &hints, &res);
-
- if (n < 0) {
- fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
- return n;
- }
-
- for (t = res; t; t = t->ai_next) {
- sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
- if (sockfd >= 0) {
- n = 1;
-
- setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
-
- if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
- break;
- close(sockfd);
- sockfd = -1;
- }
- }
-
- freeaddrinfo(res);
-
- if (sockfd < 0) {
- fprintf(stderr, "Couldn't listen to port %d\n", port);
- return sockfd;
- }
-
- listen(sockfd, 1);
- connfd = accept(sockfd, NULL, 0);
- if (connfd < 0) {
- perror("server accept");
- fprintf(stderr, "accept() failed\n");
- close(sockfd);
- return connfd;
+ return NULL;
}
- close(sockfd);
- return connfd;
+ return rem_dest;
}
-static struct pingpong_dest *pp_server_exch_dest(int connfd, const struct pingpong_dest *my_dest)
+static struct pingpong_dest * pp_exch_dest(int sockfd, const char *servername,
+ const struct pingpong_dest *my_dest)
{
- char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];
struct pingpong_dest *rem_dest = NULL;
- int parsed;
- int n;
-
- n = read(connfd, msg, sizeof msg);
- if (n != sizeof msg) {
- perror("server read");
- fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
- goto out;
- }
- rem_dest = malloc(sizeof *rem_dest);
- if (!rem_dest)
- goto out;
-
- parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,
- &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
- if (parsed != 5) {
- fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,
- msg);
- free(rem_dest);
- rem_dest = NULL;
- goto out;
+ if (servername) {
+ if (!pp_write_keys(sockfd, my_dest))
+ goto exch_failed;
+ rem_dest = pp_read_keys(sockfd, my_dest);
+ if (!rem_dest)
+ goto exch_failed;
+ } else {
+ rem_dest = pp_read_keys(sockfd, my_dest);
+ if (!rem_dest)
+ goto exch_failed;
+ if (!pp_write_keys(sockfd, my_dest))
+ goto exch_failed;
}
- sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,
- my_dest->psn, my_dest->rkey, my_dest->vaddr);
- if (write(connfd, msg, sizeof msg) != sizeof msg) {
- perror("server write");
- fprintf(stderr, "Couldn't send local address\n");
- free(rem_dest);
- rem_dest = NULL;
- goto out;
- }
-out:
+exch_failed:
return rem_dest;
}
+
static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
int tx_depth, int port)
{
@@ -424,6 +387,86 @@ static int pp_connect_ctx(struct pingpon
return 0;
}
+static struct ibv_device * pp_find_dev( const char *ib_devname)
+{
+ struct dlist *dev_list;
+ struct ibv_device *ib_dev = NULL;
+
+ dev_list = ibv_get_devices();
+
+ dlist_start(dev_list);
+ if (!ib_devname) {
+ ib_dev = dlist_next(dev_list);
+ if (!ib_dev)
+ fprintf(stderr, "No IB devices found\n");
+ } else {
+ dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
+ if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+ break;
+ if (!ib_dev)
+ fprintf(stderr, "IB device %s not found\n", ib_devname);
+ }
+ return ib_dev;
+}
+
+
+static struct pingpong_dest * pp_open_port(struct pingpong_context *ctx,
+ const char * servername, int ib_port, int port)
+{
+ char addr_fmt[] = "%8s address: LID %#04x QPN %#06x PSN %#06x RKey %#08x VAddr %#016Lx\n";
+ struct pingpong_dest my_dest;
+ struct pingpong_dest *rem_dest;
+ int sockfd;
+
+
+ /* Create connection between client and server.
+ * We do it by exchanging data over a TCP socket connection. */
+
+ my_dest.lid = pp_get_local_lid(ctx, ib_port);
+ my_dest.qpn = ctx->qp->qp_num;
+ my_dest.psn = lrand48() & 0xffffff;
+ if (!my_dest.lid) {
+ fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
+ return NULL;
+ }
+ my_dest.rkey = ctx->mr->rkey;
+ my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
+
+ printf(addr_fmt, "local", my_dest.lid, my_dest.qpn, my_dest.psn,
+ my_dest.rkey, my_dest.vaddr);
+
+
+ sockfd = pp_connect_sock(servername, port);
+ if (sockfd < 0) {
+ printf("pp_connect_sock(%s,%d) failed (%d)!\n",
+ servername, port, sockfd);
+ return NULL;
+ }
+
+ rem_dest = pp_exch_dest(sockfd, servername, &my_dest);
+ if (!rem_dest)
+ return NULL;
+
+ printf(addr_fmt, "remote", rem_dest->lid, rem_dest->qpn, rem_dest->psn,
+ rem_dest->rkey, rem_dest->vaddr);
+
+ if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
+ return NULL;
+
+ /* An additional handshake is required *after* moving qp to RTR.
+ * Arbitrarily reuse exch_dest for this purpose.
+ */
+
+ free(rem_dest);
+ rem_dest = pp_exch_dest(sockfd, servername, &my_dest);
+
+ write(sockfd, "done", sizeof "done");
+ close(sockfd);
+
+ return rem_dest;
+}
+
+
static void usage(const char *argv0)
{
printf("Usage:\n");
@@ -518,27 +561,26 @@ static void print_report(struct report_o
int main(int argc, char *argv[])
{
- struct dlist *dev_list;
struct ibv_device *ib_dev;
struct pingpong_context *ctx;
- struct pingpong_dest my_dest;
struct pingpong_dest *rem_dest;
char *ib_devname = NULL;
char *servername = NULL;
- int port = 18515;
- int ib_port = 1;
- int size = 1;
- int tx_depth = 50;
- int iters = 1000;
- int scnt, rcnt, ccnt;
- int sockfd;
+
struct ibv_qp *qp;
struct ibv_send_wr *wr;
volatile char *poll_buf;
volatile char *post_buf;
- struct report_options report = {};
- cycles_t *tstamp;
+ int port = 18515;
+ int ib_port = 1;
+ int size = 1;
+ int iters = 1000;
+ int tx_depth = 50;
+ int scnt, rcnt, ccnt;
+
+ static struct report_options report = {};
+ static cycles_t *tstamp;
/* Parameter parsing. */
while (1) {
@@ -578,25 +620,25 @@ int main(int argc, char *argv[])
ib_port = strtol(optarg, NULL, 0);
if (ib_port < 0) {
usage(argv[0]);
- return 1;
+ return 2;
}
break;
case 's':
size = strtol(optarg, NULL, 0);
- if (size < 1) { usage(argv[0]); return 1; }
+ if (size < 1) { usage(argv[0]); return 3; }
break;
case 't':
tx_depth = strtol(optarg, NULL, 0);
- if (tx_depth < 1) { usage(argv[0]); return 1; }
+ if (tx_depth < 1) { usage(argv[0]); return 4; }
break;
case 'n':
iters = strtol(optarg, NULL, 0);
if (iters < 2) {
usage(argv[0]);
- return 1;
+ return 5;
}
break;
@@ -615,7 +657,7 @@ int main(int argc, char *argv[])
default:
usage(argv[0]);
- return 1;
+ return 5;
}
}
@@ -623,90 +665,26 @@ int main(int argc, char *argv[])
servername = strdupa(argv[optind]);
else if (optind < argc) {
usage(argv[0]);
- return 1;
+ return 6;
}
- /* Done with parameter parsing. Perform setup. */
+ /*
+ * Done with parameter parsing. Perform setup.
+ */
srand48(getpid() * time(NULL));
-
page_size = sysconf(_SC_PAGESIZE);
- dev_list = ibv_get_devices();
-
- dlist_start(dev_list);
- if (!ib_devname) {
- ib_dev = dlist_next(dev_list);
- if (!ib_dev) {
- fprintf(stderr, "No IB devices found\n");
- return 1;
- }
- } else {
- dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
- if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
- break;
- if (!ib_dev) {
- fprintf(stderr, "IB device %s not found\n", ib_devname);
- return 1;
- }
- }
+ ib_dev = pp_find_dev(ib_devname);
ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
if (!ctx)
- return 1;
-
- /* Create connection between client and server.
- * We do it by exchanging data over a TCP socket connection. */
-
- my_dest.lid = pp_get_local_lid(ctx, ib_port);
- my_dest.qpn = ctx->qp->qp_num;
- my_dest.psn = lrand48() & 0xffffff;
- if (!my_dest.lid) {
- fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
- return 1;
- }
- my_dest.rkey = ctx->mr->rkey;
- my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
-
- printf(" local address: LID %#04x, QPN %#06x, PSN %#06x "
- "RKey %#08x VAddr %#016Lx\n",
- my_dest.lid, my_dest.qpn, my_dest.psn,
- my_dest.rkey, my_dest.vaddr);
-
- if (servername) {
- sockfd = pp_client_connect(servername, port);
- if (sockfd < 0)
- return 1;
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);
- } else {
- sockfd = pp_server_connect(port);
- if (sockfd < 0)
- return 1;
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);
- }
+ return 7;
+ rem_dest = pp_open_port(ctx, servername, ib_port, port);
if (!rem_dest)
- return 1;
-
- printf(" remote address: LID %#04x, QPN %#06x, PSN %#06x, "
- "RKey %#08x VAddr %#016Lx\n",
- rem_dest->lid, rem_dest->qpn, rem_dest->psn,
- rem_dest->rkey, rem_dest->vaddr);
-
- if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
- return 1;
-
- /* An additional handshake is required *after* moving qp to RTR.
- Arbitrarily reuse exch_dest for this purpose. */
- if (servername) {
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);
- } else {
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);
- }
-
- write(sockfd, "done", sizeof "done");
- close(sockfd);
+ return 8;
wr = &ctx->wr;
ctx->list.addr = (uintptr_t) ctx->buf;
@@ -726,7 +704,7 @@ int main(int argc, char *argv[])
if (!tstamp) {
perror("malloc");
- return 1;
+ return 9;
}
/* Done with setup. Start the test. */
@@ -736,8 +714,8 @@ int main(int argc, char *argv[])
/* Wait till buffer changes. */
if (rcnt < iters && !(scnt < 1 && servername)) {
++rcnt;
- while (*poll_buf != (char)rcnt) {
- }
+ while (*poll_buf != (char)rcnt)
+ ;
/* Here the data is already in the physical memory.
If we wanted to actually use it, we may need
a read memory barrier here. */
@@ -751,7 +729,8 @@ int main(int argc, char *argv[])
if (ibv_post_send(qp, wr, &bad_wr)) {
fprintf(stderr, "Couldn't post send: scnt=%d\n",
scnt);
- return 1;
+ free(tstamp);
+ return 10;
}
}
@@ -765,7 +744,8 @@ int main(int argc, char *argv[])
if (ne < 0) {
fprintf(stderr, "poll CQ failed %d\n", ne);
- return 1;
+ free(tstamp);
+ return 11;
}
if (wc.status != IBV_WC_SUCCESS) {
fprintf(stderr, "Completion wth error at %s:\n",
@@ -774,7 +754,8 @@ int main(int argc, char *argv[])
wc.status, (int) wc.wr_id);
fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",
scnt, rcnt, ccnt);
- return 1;
+ free(tstamp);
+ return 12;
}
}
}
More information about the general
mailing list