[openib-general] [PATCH] rdma_lat-09 and results
Grant Grundler
iod00d at hp.com
Wed Jun 8 16:45:32 PDT 2005
On Wed, Jun 01, 2005 at 05:52:28PM -0700, Grant Grundler wrote:
> Michael,
>
> Good news:
> My next cleanup of rdma_lat.c is working and patch is appended.
Michael,
Did you want to comment on this patch or apply it?
I can resend privately it you don't have the original mail.
> Summary of changes below.
>
> Bad News:
> perf is about ~15 cycles slower since the last time I tested.
> (Hrm...maybe it's time to cycle power on the TS90 switch again.)
Seems with discussion on perf was distracting from the patch.
I'll seperate those next time.
thanks,
grant
> Here's with the new rdma_lat.c:
> grundler at gsyprf3:/usr/src/openib_gen2/src/userspace/perftest$ ./rdma_lat -C
> local address: LID 0x27 QPN 0x80406 PSN 0x9188f7 RKey 0x300434 VAddr 0x6000000000014001
> remote address: LID 0x25 QPN 0x70406 PSN 0x5d4824 RKey 0x2a0434 VAddr 0x6000000000014001
> Latency typical: 7140 cycles
> Latency best : 6915 cycles
> Latency worst : 52915.5 cycles
> grundler at gsyprf3:/usr/src/openib_gen2/src/userspace/perftest$
>
> And the "client" side:
> grundler at iota:/usr/src/openib_gen2/src/userspace/perftest$ ./rdma_lat -C 10.0.0.51
> local address: LID 0x25 QPN 0x70406 PSN 0x5d4824 RKey 0x2a0434 VAddr 0x6000000000014001
> remote address: LID 0x27 QPN 0x80406 PSN 0x9188f7 RKey 0x300434 VAddr 0x6000000000014001
> Latency typical: 7140 cycles
> Latency best : 6907 cycles
> Latency worst : 94920 cycles
>
>
> The previous set of rdma_lat results are here:
> http://openib.org/pipermail/openib-general/2005-May/006721.html
>
> I'll guess the previous SVN verion was no older than r2229.
>
>
> I get 7140 to 7151 for the original rdma_lat. Usually 7147.5.
> I get 7132 to 7155 with my version of rdma_lat. Usually 7140.
> No statistically significant differences.
> Both essentially agree on the higher result.
> Using "-n 10000" gave more consistent results *
>
> I use "taskset" to bind the rdma_lat test to a CPU.
> But it didn't matter which CPU I bound the task to - results
> where basically the same. I suspect the "stream" mode just
> does not depend on or generating that many interrupts.
>
>
> diffstat rdma_lat.c-09-diff
> rdma_lat.c | 395 +++++++++++++++++++++++++++++--------------------------------
> 1 files changed, 188 insertions(+), 207 deletions(-)
>
> Commit Log entry/Summary of changes:
> o move device lookup from main() to pp_find_dev()
> o move sockfd handling code to pp_open_port()
> o consolidate server/client "key exchange" code path
> o enumerate return values in main()
> o fixed nit: pp_*_exch_dest was called twice.
> Each time it would malloc a new "rem_dest".
> Code in pp_open_port() now free()'s the first one.
>
> Signed-off-by: Grant Grundler <iod00d at hp.com>
>
> thanks,
> grant
>
>
>
> Index: rdma_lat.c
> ===================================================================
> --- rdma_lat.c (revision 2519)
> +++ rdma_lat.c (working copy)
> @@ -103,30 +103,40 @@ static uint16_t pp_get_local_lid(struct
> return attr.lid;
> }
>
> -static int pp_client_connect(const char *servername, int port)
> +
> +static int pp_connect_sock(const char *servername, int port)
> {
> struct addrinfo *res, *t;
> struct addrinfo hints = {
> + .ai_flags = AI_PASSIVE, /* Server only? */
> .ai_family = AF_UNSPEC,
> .ai_socktype = SOCK_STREAM
> };
> char *service;
> - int n;
> int sockfd = -1;
> + int n;
>
> asprintf(&service, "%d", port);
> n = getaddrinfo(servername, service, &hints, &res);
>
> if (n < 0) {
> - fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
> + fprintf(stderr, "%s for %s:%d\n", gai_strerror(n),
> + servername ? servername : "NULL", port);
> return n;
> }
>
> for (t = res; t; t = t->ai_next) {
> sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
> if (sockfd >= 0) {
> - if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
> - break;
> + if (servername) {
> + if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
> + break;
> + } else {
> + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
> + if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
> + break;
> + }
> +
> close(sockfd);
> sockfd = -1;
> }
> @@ -134,147 +144,100 @@ static int pp_client_connect(const char
>
> freeaddrinfo(res);
>
> - if (sockfd < 0) {
> - fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
> - return sockfd;
> + if (sockfd < 0)
> + fprintf(stderr, "Couldn't %s to %s:%d\n",
> + servername ? "connect" : "bind",
> + servername ? servername : "NULL", port);
> +
> + /* "server" side needs to wait/listen for client to call */
> + if (!servername) {
> + int connfd;
> +
> + listen(sockfd, 1);
> + connfd = accept(sockfd, NULL, 0);
> + if (connfd < 0) {
> + perror("server accept");
> + fprintf(stderr, "accept() failed\n");
> + }
> + close(sockfd);
> + return connfd;
> }
> +
> return sockfd;
> }
>
> -struct pingpong_dest * pp_client_exch_dest(int sockfd,
> - const struct pingpong_dest *my_dest)
> +#define KEY_MSG_SIZE (sizeof "0000:000000:000000:00000000:0000000000000000")
> +#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016Lx"
> +
> +static int pp_write_keys(int sockfd, const struct pingpong_dest *my_dest)
> {
> - struct pingpong_dest *rem_dest = NULL;
> - char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];
> - int parsed;
> + char msg[KEY_MSG_SIZE];
> +
> + sprintf(msg, KEY_PRINT_FMT, my_dest->lid, my_dest->qpn,
> + my_dest->psn, my_dest->rkey, my_dest->vaddr);
>
> - sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,
> - my_dest->psn,my_dest->rkey,my_dest->vaddr);
> if (write(sockfd, msg, sizeof msg) != sizeof msg) {
> perror("client write");
> fprintf(stderr, "Couldn't send local address\n");
> - goto out;
> + return 0;
> }
>
> + return 1;
> +}
> +
> +static struct pingpong_dest * pp_read_keys(int sockfd, const struct pingpong_dest *my_dest)
> +{
> + struct pingpong_dest *rem_dest = NULL;
> + int parsed;
> + char msg[KEY_MSG_SIZE];
> +
> if (read(sockfd, msg, sizeof msg) != sizeof msg) {
> - perror("client read");
> + perror("pp_read_keys");
> fprintf(stderr, "Couldn't read remote address\n");
> - goto out;
> + return NULL;
> }
>
> rem_dest = malloc(sizeof *rem_dest);
> if (!rem_dest)
> - goto out;
> + return NULL;
>
> - parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,
> - &rem_dest->psn,&rem_dest->rkey,&rem_dest->vaddr);
> + parsed = sscanf(msg, KEY_PRINT_FMT, &rem_dest->lid, &rem_dest->qpn,
> + &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
>
> if (parsed != 5) {
> - fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,
> - msg);
> + fprintf(stderr, "Couldn't parse line <%.*s>\n",
> + (int)sizeof msg, msg);
> free(rem_dest);
> - rem_dest = NULL;
> - goto out;
> - }
> -out:
> - return rem_dest;
> -}
> -
> -int pp_server_connect(int port)
> -{
> - struct addrinfo *res, *t;
> - struct addrinfo hints = {
> - .ai_flags = AI_PASSIVE,
> - .ai_family = AF_UNSPEC,
> - .ai_socktype = SOCK_STREAM
> - };
> - char *service;
> - int sockfd = -1, connfd;
> - int n;
> -
> - asprintf(&service, "%d", port);
> - n = getaddrinfo(NULL, service, &hints, &res);
> -
> - if (n < 0) {
> - fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
> - return n;
> - }
> -
> - for (t = res; t; t = t->ai_next) {
> - sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
> - if (sockfd >= 0) {
> - n = 1;
> -
> - setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
> -
> - if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
> - break;
> - close(sockfd);
> - sockfd = -1;
> - }
> - }
> -
> - freeaddrinfo(res);
> -
> - if (sockfd < 0) {
> - fprintf(stderr, "Couldn't listen to port %d\n", port);
> - return sockfd;
> - }
> -
> - listen(sockfd, 1);
> - connfd = accept(sockfd, NULL, 0);
> - if (connfd < 0) {
> - perror("server accept");
> - fprintf(stderr, "accept() failed\n");
> - close(sockfd);
> - return connfd;
> + return NULL;
> }
>
> - close(sockfd);
> - return connfd;
> + return rem_dest;
> }
>
> -static struct pingpong_dest *pp_server_exch_dest(int connfd, const struct pingpong_dest *my_dest)
> +static struct pingpong_dest * pp_exch_dest(int sockfd, const char *servername,
> + const struct pingpong_dest *my_dest)
> {
> - char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];
> struct pingpong_dest *rem_dest = NULL;
> - int parsed;
> - int n;
> -
> - n = read(connfd, msg, sizeof msg);
> - if (n != sizeof msg) {
> - perror("server read");
> - fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
> - goto out;
> - }
>
> - rem_dest = malloc(sizeof *rem_dest);
> - if (!rem_dest)
> - goto out;
> -
> - parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,
> - &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
> - if (parsed != 5) {
> - fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,
> - msg);
> - free(rem_dest);
> - rem_dest = NULL;
> - goto out;
> + if (servername) {
> + if (!pp_write_keys(sockfd, my_dest))
> + goto exch_failed;
> + rem_dest = pp_read_keys(sockfd, my_dest);
> + if (!rem_dest)
> + goto exch_failed;
> + } else {
> + rem_dest = pp_read_keys(sockfd, my_dest);
> + if (!rem_dest)
> + goto exch_failed;
> + if (!pp_write_keys(sockfd, my_dest))
> + goto exch_failed;
> }
>
> - sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,
> - my_dest->psn, my_dest->rkey, my_dest->vaddr);
> - if (write(connfd, msg, sizeof msg) != sizeof msg) {
> - perror("server write");
> - fprintf(stderr, "Couldn't send local address\n");
> - free(rem_dest);
> - rem_dest = NULL;
> - goto out;
> - }
> -out:
> +exch_failed:
> return rem_dest;
> }
>
> +
> static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
> int tx_depth, int port)
> {
> @@ -424,6 +387,86 @@ static int pp_connect_ctx(struct pingpon
> return 0;
> }
>
> +static struct ibv_device * pp_find_dev( const char *ib_devname)
> +{
> + struct dlist *dev_list;
> + struct ibv_device *ib_dev = NULL;
> +
> + dev_list = ibv_get_devices();
> +
> + dlist_start(dev_list);
> + if (!ib_devname) {
> + ib_dev = dlist_next(dev_list);
> + if (!ib_dev)
> + fprintf(stderr, "No IB devices found\n");
> + } else {
> + dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
> + if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
> + break;
> + if (!ib_dev)
> + fprintf(stderr, "IB device %s not found\n", ib_devname);
> + }
> + return ib_dev;
> +}
> +
> +
> +static struct pingpong_dest * pp_open_port(struct pingpong_context *ctx,
> + const char * servername, int ib_port, int port)
> +{
> + char addr_fmt[] = "%8s address: LID %#04x QPN %#06x PSN %#06x RKey %#08x VAddr %#016Lx\n";
> + struct pingpong_dest my_dest;
> + struct pingpong_dest *rem_dest;
> + int sockfd;
> +
> +
> + /* Create connection between client and server.
> + * We do it by exchanging data over a TCP socket connection. */
> +
> + my_dest.lid = pp_get_local_lid(ctx, ib_port);
> + my_dest.qpn = ctx->qp->qp_num;
> + my_dest.psn = lrand48() & 0xffffff;
> + if (!my_dest.lid) {
> + fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
> + return NULL;
> + }
> + my_dest.rkey = ctx->mr->rkey;
> + my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
> +
> + printf(addr_fmt, "local", my_dest.lid, my_dest.qpn, my_dest.psn,
> + my_dest.rkey, my_dest.vaddr);
> +
> +
> + sockfd = pp_connect_sock(servername, port);
> + if (sockfd < 0) {
> + printf("pp_connect_sock(%s,%d) failed (%d)!\n",
> + servername, port, sockfd);
> + return NULL;
> + }
> +
> + rem_dest = pp_exch_dest(sockfd, servername, &my_dest);
> + if (!rem_dest)
> + return NULL;
> +
> + printf(addr_fmt, "remote", rem_dest->lid, rem_dest->qpn, rem_dest->psn,
> + rem_dest->rkey, rem_dest->vaddr);
> +
> + if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
> + return NULL;
> +
> + /* An additional handshake is required *after* moving qp to RTR.
> + * Arbitrarily reuse exch_dest for this purpose.
> + */
> +
> + free(rem_dest);
> + rem_dest = pp_exch_dest(sockfd, servername, &my_dest);
> +
> + write(sockfd, "done", sizeof "done");
> + close(sockfd);
> +
> + return rem_dest;
> +}
> +
> +
> static void usage(const char *argv0)
> {
> printf("Usage:\n");
> @@ -518,27 +561,26 @@ static void print_report(struct report_o
>
> int main(int argc, char *argv[])
> {
> - struct dlist *dev_list;
> struct ibv_device *ib_dev;
> struct pingpong_context *ctx;
> - struct pingpong_dest my_dest;
> struct pingpong_dest *rem_dest;
> char *ib_devname = NULL;
> char *servername = NULL;
> - int port = 18515;
> - int ib_port = 1;
> - int size = 1;
> - int tx_depth = 50;
> - int iters = 1000;
> - int scnt, rcnt, ccnt;
> - int sockfd;
> +
> struct ibv_qp *qp;
> struct ibv_send_wr *wr;
> volatile char *poll_buf;
> volatile char *post_buf;
> - struct report_options report = {};
>
> - cycles_t *tstamp;
> + int port = 18515;
> + int ib_port = 1;
> + int size = 1;
> + int iters = 1000;
> + int tx_depth = 50;
> + int scnt, rcnt, ccnt;
> +
> + static struct report_options report = {};
> + static cycles_t *tstamp;
>
> /* Parameter parsing. */
> while (1) {
> @@ -578,25 +620,25 @@ int main(int argc, char *argv[])
> ib_port = strtol(optarg, NULL, 0);
> if (ib_port < 0) {
> usage(argv[0]);
> - return 1;
> + return 2;
> }
> break;
>
> case 's':
> size = strtol(optarg, NULL, 0);
> - if (size < 1) { usage(argv[0]); return 1; }
> + if (size < 1) { usage(argv[0]); return 3; }
> break;
>
> case 't':
> tx_depth = strtol(optarg, NULL, 0);
> - if (tx_depth < 1) { usage(argv[0]); return 1; }
> + if (tx_depth < 1) { usage(argv[0]); return 4; }
> break;
>
> case 'n':
> iters = strtol(optarg, NULL, 0);
> if (iters < 2) {
> usage(argv[0]);
> - return 1;
> + return 5;
> }
>
> break;
> @@ -615,7 +657,7 @@ int main(int argc, char *argv[])
>
> default:
> usage(argv[0]);
> - return 1;
> + return 5;
> }
> }
>
> @@ -623,90 +665,26 @@ int main(int argc, char *argv[])
> servername = strdupa(argv[optind]);
> else if (optind < argc) {
> usage(argv[0]);
> - return 1;
> + return 6;
> }
>
>
> - /* Done with parameter parsing. Perform setup. */
> + /*
> + * Done with parameter parsing. Perform setup.
> + */
>
> srand48(getpid() * time(NULL));
> -
> page_size = sysconf(_SC_PAGESIZE);
>
> - dev_list = ibv_get_devices();
> -
> - dlist_start(dev_list);
> - if (!ib_devname) {
> - ib_dev = dlist_next(dev_list);
> - if (!ib_dev) {
> - fprintf(stderr, "No IB devices found\n");
> - return 1;
> - }
> - } else {
> - dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
> - if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
> - break;
> - if (!ib_dev) {
> - fprintf(stderr, "IB device %s not found\n", ib_devname);
> - return 1;
> - }
> - }
> + ib_dev = pp_find_dev(ib_devname);
>
> ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
> if (!ctx)
> - return 1;
> -
> - /* Create connection between client and server.
> - * We do it by exchanging data over a TCP socket connection. */
> -
> - my_dest.lid = pp_get_local_lid(ctx, ib_port);
> - my_dest.qpn = ctx->qp->qp_num;
> - my_dest.psn = lrand48() & 0xffffff;
> - if (!my_dest.lid) {
> - fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
> - return 1;
> - }
> - my_dest.rkey = ctx->mr->rkey;
> - my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
> -
> - printf(" local address: LID %#04x, QPN %#06x, PSN %#06x "
> - "RKey %#08x VAddr %#016Lx\n",
> - my_dest.lid, my_dest.qpn, my_dest.psn,
> - my_dest.rkey, my_dest.vaddr);
> -
> - if (servername) {
> - sockfd = pp_client_connect(servername, port);
> - if (sockfd < 0)
> - return 1;
> - rem_dest = pp_client_exch_dest(sockfd, &my_dest);
> - } else {
> - sockfd = pp_server_connect(port);
> - if (sockfd < 0)
> - return 1;
> - rem_dest = pp_server_exch_dest(sockfd, &my_dest);
> - }
> + return 7;
>
> + rem_dest = pp_open_port(ctx, servername, ib_port, port);
> if (!rem_dest)
> - return 1;
> -
> - printf(" remote address: LID %#04x, QPN %#06x, PSN %#06x, "
> - "RKey %#08x VAddr %#016Lx\n",
> - rem_dest->lid, rem_dest->qpn, rem_dest->psn,
> - rem_dest->rkey, rem_dest->vaddr);
> -
> - if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
> - return 1;
> -
> - /* An additional handshake is required *after* moving qp to RTR.
> - Arbitrarily reuse exch_dest for this purpose. */
> - if (servername) {
> - rem_dest = pp_client_exch_dest(sockfd, &my_dest);
> - } else {
> - rem_dest = pp_server_exch_dest(sockfd, &my_dest);
> - }
> -
> - write(sockfd, "done", sizeof "done");
> - close(sockfd);
> + return 8;
>
> wr = &ctx->wr;
> ctx->list.addr = (uintptr_t) ctx->buf;
> @@ -726,7 +704,7 @@ int main(int argc, char *argv[])
>
> if (!tstamp) {
> perror("malloc");
> - return 1;
> + return 9;
> }
>
> /* Done with setup. Start the test. */
> @@ -736,8 +714,8 @@ int main(int argc, char *argv[])
> /* Wait till buffer changes. */
> if (rcnt < iters && !(scnt < 1 && servername)) {
> ++rcnt;
> - while (*poll_buf != (char)rcnt) {
> - }
> + while (*poll_buf != (char)rcnt)
> + ;
> /* Here the data is already in the physical memory.
> If we wanted to actually use it, we may need
> a read memory barrier here. */
> @@ -751,7 +729,8 @@ int main(int argc, char *argv[])
> if (ibv_post_send(qp, wr, &bad_wr)) {
> fprintf(stderr, "Couldn't post send: scnt=%d\n",
> scnt);
> - return 1;
> + free(tstamp);
> + return 10;
> }
> }
>
> @@ -765,7 +744,8 @@ int main(int argc, char *argv[])
>
> if (ne < 0) {
> fprintf(stderr, "poll CQ failed %d\n", ne);
> - return 1;
> + free(tstamp);
> + return 11;
> }
> if (wc.status != IBV_WC_SUCCESS) {
> fprintf(stderr, "Completion wth error at %s:\n",
> @@ -774,7 +754,8 @@ int main(int argc, char *argv[])
> wc.status, (int) wc.wr_id);
> fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",
> scnt, rcnt, ccnt);
> - return 1;
> + free(tstamp);
> + return 12;
> }
> }
> }
> _______________________________________________
> openib-general mailing list
> openib-general at openib.org
> http://openib.org/mailman/listinfo/openib-general
>
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
More information about the general
mailing list