[openib-general] Re: [PATCH] rdma_lat-09 and results
Michael S. Tsirkin
mst at mellanox.co.il
Wed Jun 15 08:04:40 PDT 2005
Quoting r. Grant Grundler <iod00d at hp.com>:
> Subject: [PATCH] rdma_lat-09 and results
>
> Michael,
>
> Good news:
> My next cleanup of rdma_lat.c is working and patch is appended.
> Summary of changes below.
>
> Bad News:
> perf is about ~15 cycles slower since the last time I tested.
> (Hrm...maybe it's time to cycle power on the TS90 switch again.)
>
>
> Here's with the new rdma_lat.c:
> grundler at gsyprf3:/usr/src/openib_gen2/src/userspace/perftest$ ./rdma_lat -C
> local address: LID 0x27 QPN 0x80406 PSN 0x9188f7 RKey 0x300434 VAddr 0x6000000000014001
> remote address: LID 0x25 QPN 0x70406 PSN 0x5d4824 RKey 0x2a0434 VAddr 0x6000000000014001
> Latency typical: 7140 cycles
> Latency best : 6915 cycles
> Latency worst : 52915.5 cycles
> grundler at gsyprf3:/usr/src/openib_gen2/src/userspace/perftest$
>
> And the "client" side:
> grundler at iota:/usr/src/openib_gen2/src/userspace/perftest$ ./rdma_lat -C 10.0.0.51
> local address: LID 0x25 QPN 0x70406 PSN 0x5d4824 RKey 0x2a0434 VAddr 0x6000000000014001
> remote address: LID 0x27 QPN 0x80406 PSN 0x9188f7 RKey 0x300434 VAddr 0x6000000000014001
> Latency typical: 7140 cycles
> Latency best : 6907 cycles
> Latency worst : 94920 cycles
>
>
> The previous set of rdma_lat results are here:
> http://openib.org/pipermail/openib-general/2005-May/006721.html
>
> I'll guess the previous SVN verion was no older than r2229.
>
>
> I get 7140 to 7151 for the original rdma_lat. Usually 7147.5.
> I get 7132 to 7155 with my version of rdma_lat. Usually 7140.
> No statistically significant differences.
> Both essentially agree on the higher result.
> Using "-n 10000" gave more consistent results *
>
> I use "taskset" to bind the rdma_lat test to a CPU.
> But it didn't matter which CPU I bound the task to - results
> where basically the same. I suspect the "stream" mode just
> does not depend on or generating that many interrupts.
>
>
> diffstat rdma_lat.c-09-diff
> rdma_lat.c | 395 +++++++++++++++++++++++++++++--------------------------------
> 1 files changed, 188 insertions(+), 207 deletions(-)
>
> Commit Log entry/Summary of changes:
> o move device lookup from main() to pp_find_dev()
> o move sockfd handling code to pp_open_port()
> o consolidate server/client "key exchange" code path
> o enumerate return values in main()
> o fixed nit: pp_*_exch_dest was called twice.
> Each time it would malloc a new "rem_dest".
> Code in pp_open_port() now free()'s the first one.
>
> Signed-off-by: Grant Grundler <iod00d at hp.com>
>
> thanks,
> grant
OK, I accepted most of it. Thanks!
There were some issues that I fixed, I also made some other small
cleanups.
One thing I skipped is the pp_connect_sock consolidation: I like to keep the
split between client and server at the top level. Connect routines have some
code duplication but I dont like spreading if (servername) all around them,
and I dont mind duplication much since this is standard socket stuff.
Grant, I'm not sure I understand the reason for
> o enumerate return values in main()
bit but I went along with it. Why do you like it?
The patch also added free(tstamp) in a couple of places, but since
we dont bother to free resources (qp, cq, ...) and since this was in
the test part which must be cristal clear, I skipped this bit.
If we ever do a proper cleanup, its probably a good idea to goto
outside the loop and handle errors there.
Here's what I ended up checking in:
Index: rdma_lat.c
===================================================================
--- rdma_lat.c (revision 2608)
+++ rdma_lat.c (working copy)
@@ -103,6 +103,71 @@ static uint16_t pp_get_local_lid(struct
return attr.lid;
}
+static struct ibv_device *pp_find_dev(const char *ib_devname)
+{
+ struct dlist *dev_list;
+ struct ibv_device *ib_dev = NULL;
+
+ dev_list = ibv_get_devices();
+
+ dlist_start(dev_list);
+ if (!ib_devname) {
+ ib_dev = dlist_next(dev_list);
+ if (!ib_dev)
+ fprintf(stderr, "No IB devices found\n");
+ } else {
+ dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
+ if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+ break;
+ if (!ib_dev)
+ fprintf(stderr, "IB device %s not found\n", ib_devname);
+ }
+ return ib_dev;
+}
+
+#define KEY_MSG_SIZE (sizeof "0000:000000:000000:00000000:0000000000000000")
+#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016Lx"
+
+static int pp_write_keys(int sockfd, const struct pingpong_dest *my_dest)
+{
+ char msg[KEY_MSG_SIZE];
+
+ sprintf(msg, KEY_PRINT_FMT, my_dest->lid, my_dest->qpn,
+ my_dest->psn, my_dest->rkey, my_dest->vaddr);
+
+ if (write(sockfd, msg, sizeof msg) != sizeof msg) {
+ perror("client write");
+ fprintf(stderr, "Couldn't send local address\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int pp_read_keys(int sockfd, const struct pingpong_dest *my_dest,
+ struct pingpong_dest *rem_dest)
+{
+ int parsed;
+ char msg[KEY_MSG_SIZE];
+
+ if (read(sockfd, msg, sizeof msg) != sizeof msg) {
+ perror("pp_read_keys");
+ fprintf(stderr, "Couldn't read remote address\n");
+ return -1;
+ }
+
+ parsed = sscanf(msg, KEY_PRINT_FMT, &rem_dest->lid, &rem_dest->qpn,
+ &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
+
+ if (parsed != 5) {
+ fprintf(stderr, "Couldn't parse line <%.*s>\n",
+ (int)sizeof msg, msg);
+ return -1;
+ }
+
+ return 0;
+}
+
static int pp_client_connect(const char *servername, int port)
{
struct addrinfo *res, *t;
@@ -141,46 +206,16 @@ static int pp_client_connect(const char
return sockfd;
}
-struct pingpong_dest * pp_client_exch_dest(int sockfd,
- const struct pingpong_dest *my_dest)
+static int pp_client_exch_dest(int sockfd, const struct pingpong_dest *my_dest,
+ struct pingpong_dest *rem_dest)
{
- struct pingpong_dest *rem_dest = NULL;
- char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];
- int parsed;
-
- sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,
- my_dest->psn,my_dest->rkey,my_dest->vaddr);
- if (write(sockfd, msg, sizeof msg) != sizeof msg) {
- perror("client write");
- fprintf(stderr, "Couldn't send local address\n");
- goto out;
- }
-
- if (read(sockfd, msg, sizeof msg) != sizeof msg) {
- perror("client read");
- fprintf(stderr, "Couldn't read remote address\n");
- goto out;
- }
+ if (pp_write_keys(sockfd, my_dest))
+ return -1;
- rem_dest = malloc(sizeof *rem_dest);
- if (!rem_dest)
- goto out;
-
- parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,
- &rem_dest->psn,&rem_dest->rkey,&rem_dest->vaddr);
-
- if (parsed != 5) {
- fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,
- msg);
- free(rem_dest);
- rem_dest = NULL;
- goto out;
- }
-out:
- return rem_dest;
+ return pp_read_keys(sockfd, my_dest, rem_dest);
}
-int pp_server_connect(int port)
+static int pp_server_connect(int port)
{
struct addrinfo *res, *t;
struct addrinfo hints = {
@@ -234,45 +269,14 @@ int pp_server_connect(int port)
return connfd;
}
-static struct pingpong_dest *pp_server_exch_dest(int connfd, const struct pingpong_dest *my_dest)
+static int pp_server_exch_dest(int sockfd, const struct pingpong_dest *my_dest,
+ const struct pingpong_dest* rem_dest)
{
- char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];
- struct pingpong_dest *rem_dest = NULL;
- int parsed;
- int n;
-
- n = read(connfd, msg, sizeof msg);
- if (n != sizeof msg) {
- perror("server read");
- fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
- goto out;
- }
- rem_dest = malloc(sizeof *rem_dest);
- if (!rem_dest)
- goto out;
-
- parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,
- &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
- if (parsed != 5) {
- fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,
- msg);
- free(rem_dest);
- rem_dest = NULL;
- goto out;
- }
+ if (pp_read_keys(sockfd, my_dest, rem_dest))
+ return -1;
- sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,
- my_dest->psn, my_dest->rkey, my_dest->vaddr);
- if (write(connfd, msg, sizeof msg) != sizeof msg) {
- perror("server write");
- fprintf(stderr, "Couldn't send local address\n");
- free(rem_dest);
- rem_dest = NULL;
- goto out;
- }
-out:
- return rem_dest;
+ return pp_write_keys(sockfd, my_dest);
}
static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
@@ -424,6 +428,65 @@ static int pp_connect_ctx(struct pingpon
return 0;
}
+static int pp_open_port(struct pingpong_context *ctx, const char * servername, int ib_port, int port,
+ struct pingpong_dest *rem_dest)
+{
+ char addr_fmt[] = "%8s address: LID %#04x QPN %#06x PSN %#06x RKey %#08x VAddr %#016Lx\n";
+ struct pingpong_dest my_dest;
+ int sockfd;
+ int rc;
+
+
+ /* Create connection between client and server.
+ * We do it by exchanging data over a TCP socket connection. */
+
+ my_dest.lid = pp_get_local_lid(ctx, ib_port);
+ my_dest.qpn = ctx->qp->qp_num;
+ my_dest.psn = lrand48() & 0xffffff;
+ if (!my_dest.lid) {
+ fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
+ return -1;
+ }
+ my_dest.rkey = ctx->mr->rkey;
+ my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
+
+ printf(addr_fmt, "local", my_dest.lid, my_dest.qpn, my_dest.psn,
+ my_dest.rkey, my_dest.vaddr);
+
+ sockfd = servername ? pp_client_connect(servername, port) : pp_server_connect(port);
+
+ if (sockfd < 0) {
+ printf("pp_connect_sock(%s,%d) failed (%d)!\n",
+ servername, port, sockfd);
+ return sockfd;
+ }
+
+ rc = servername ? pp_client_exch_dest(sockfd, &my_dest) :
+ pp_server_exch_dest(sockfd, &my_dest);
+ if (rc)
+ return rc;
+
+ printf(addr_fmt, "remote", rem_dest->lid, rem_dest->qpn, rem_dest->psn,
+ rem_dest->rkey, rem_dest->vaddr);
+
+ if ((rc = pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest)))
+ return rc;
+
+ /* An additional handshake is required *after* moving qp to RTR.
+ * Arbitrarily reuse exch_dest for this purpose.
+ */
+
+ rc = servername ? pp_client_exch_dest(sockfd, &my_dest) :
+ pp_server_exch_dest(sockfd, &my_dest);
+
+ if (rc)
+ return rc;
+
+ write(sockfd, "done", sizeof "done");
+ close(sockfd);
+ return 0;
+}
+
static void usage(const char *argv0)
{
printf("Usage:\n");
@@ -515,30 +578,29 @@ static void print_report(struct report_o
free(delta);
}
-
int main(int argc, char *argv[])
{
- struct dlist *dev_list;
- struct ibv_device *ib_dev;
- struct pingpong_context *ctx;
- struct pingpong_dest my_dest;
- struct pingpong_dest *rem_dest;
- char *ib_devname = NULL;
- char *servername = NULL;
+ const char *ib_devname = NULL;
+ const char *servername = NULL;
int port = 18515;
int ib_port = 1;
int size = 1;
- int tx_depth = 50;
int iters = 1000;
- int scnt, rcnt, ccnt;
- int sockfd;
- struct ibv_qp *qp;
- struct ibv_send_wr *wr;
- volatile char *poll_buf;
- volatile char *post_buf;
+ int tx_depth = 50;
struct report_options report = {};
- cycles_t *tstamp;
+ struct pingpong_context *ctx;
+ struct pingpong_dest rem_dest;
+ struct ibv_device *ib_dev;
+
+ struct ibv_qp *qp;
+ struct ibv_send_wr *wr;
+ volatile char *poll_buf;
+ volatile char *post_buf;
+
+ int scnt, rcnt, ccnt;
+
+ cycles_t *tstamp;
/* Parameter parsing. */
while (1) {
@@ -578,25 +640,25 @@ int main(int argc, char *argv[])
ib_port = strtol(optarg, NULL, 0);
if (ib_port < 0) {
usage(argv[0]);
- return 1;
+ return 2;
}
break;
case 's':
size = strtol(optarg, NULL, 0);
- if (size < 1) { usage(argv[0]); return 1; }
+ if (size < 1) { usage(argv[0]); return 3; }
break;
case 't':
tx_depth = strtol(optarg, NULL, 0);
- if (tx_depth < 1) { usage(argv[0]); return 1; }
+ if (tx_depth < 1) { usage(argv[0]); return 4; }
break;
case 'n':
iters = strtol(optarg, NULL, 0);
if (iters < 2) {
usage(argv[0]);
- return 1;
+ return 5;
}
break;
@@ -615,7 +677,7 @@ int main(int argc, char *argv[])
default:
usage(argv[0]);
- return 1;
+ return 5;
}
}
@@ -623,90 +685,26 @@ int main(int argc, char *argv[])
servername = strdupa(argv[optind]);
else if (optind < argc) {
usage(argv[0]);
- return 1;
+ return 6;
}
-
- /* Done with parameter parsing. Perform setup. */
+ /*
+ * Done with parameter parsing. Perform setup.
+ */
srand48(getpid() * time(NULL));
-
page_size = sysconf(_SC_PAGESIZE);
- dev_list = ibv_get_devices();
-
- dlist_start(dev_list);
- if (!ib_devname) {
- ib_dev = dlist_next(dev_list);
- if (!ib_dev) {
- fprintf(stderr, "No IB devices found\n");
- return 1;
- }
- } else {
- dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
- if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
- break;
- if (!ib_dev) {
- fprintf(stderr, "IB device %s not found\n", ib_devname);
- return 1;
- }
- }
+ ib_dev = pp_find_dev(ib_devname);
+ if (!ib_dev)
+ return 7;
ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
if (!ctx)
- return 1;
-
- /* Create connection between client and server.
- * We do it by exchanging data over a TCP socket connection. */
-
- my_dest.lid = pp_get_local_lid(ctx, ib_port);
- my_dest.qpn = ctx->qp->qp_num;
- my_dest.psn = lrand48() & 0xffffff;
- if (!my_dest.lid) {
- fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
- return 1;
- }
- my_dest.rkey = ctx->mr->rkey;
- my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
-
- printf(" local address: LID %#04x, QPN %#06x, PSN %#06x "
- "RKey %#08x VAddr %#016Lx\n",
- my_dest.lid, my_dest.qpn, my_dest.psn,
- my_dest.rkey, my_dest.vaddr);
-
- if (servername) {
- sockfd = pp_client_connect(servername, port);
- if (sockfd < 0)
- return 1;
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);
- } else {
- sockfd = pp_server_connect(port);
- if (sockfd < 0)
- return 1;
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);
- }
-
- if (!rem_dest)
- return 1;
-
- printf(" remote address: LID %#04x, QPN %#06x, PSN %#06x, "
- "RKey %#08x VAddr %#016Lx\n",
- rem_dest->lid, rem_dest->qpn, rem_dest->psn,
- rem_dest->rkey, rem_dest->vaddr);
+ return 8;
- if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
- return 1;
-
- /* An additional handshake is required *after* moving qp to RTR.
- Arbitrarily reuse exch_dest for this purpose. */
- if (servername) {
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);
- } else {
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);
- }
-
- write(sockfd, "done", sizeof "done");
- close(sockfd);
+ if (pp_open_port(ctx, servername, ib_port, port, &rem_dest))
+ return 9;
wr = &ctx->wr;
ctx->list.addr = (uintptr_t) ctx->buf;
@@ -723,10 +721,9 @@ int main(int argc, char *argv[])
qp = ctx->qp;
tstamp = malloc(iters * sizeof *tstamp);
-
if (!tstamp) {
perror("malloc");
- return 1;
+ return 10;
}
/* Done with setup. Start the test. */
@@ -736,8 +733,8 @@ int main(int argc, char *argv[])
/* Wait till buffer changes. */
if (rcnt < iters && !(scnt < 1 && servername)) {
++rcnt;
- while (*poll_buf != (char)rcnt) {
- }
+ while (*poll_buf != (char)rcnt)
+ ;
/* Here the data is already in the physical memory.
If we wanted to actually use it, we may need
a read memory barrier here. */
@@ -751,7 +748,7 @@ int main(int argc, char *argv[])
if (ibv_post_send(qp, wr, &bad_wr)) {
fprintf(stderr, "Couldn't post send: scnt=%d\n",
scnt);
- return 1;
+ return 11;
}
}
@@ -765,7 +762,7 @@ int main(int argc, char *argv[])
if (ne < 0) {
fprintf(stderr, "poll CQ failed %d\n", ne);
- return 1;
+ return 12;
}
if (wc.status != IBV_WC_SUCCESS) {
fprintf(stderr, "Completion wth error at %s:\n",
@@ -774,13 +771,11 @@ int main(int argc, char *argv[])
wc.status, (int) wc.wr_id);
fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",
scnt, rcnt, ccnt);
- return 1;
+ return 13;
}
}
}
print_report(&report, iters, tstamp);
-
- free(tstamp);
return 0;
}
--
MST
More information about the general
mailing list