[ofa-general] ***SPAM*** Re: [PATCH 8/8]: rds-tools: add new rds-ping utility
Olaf Kirch
okir at lst.de
Thu Apr 24 02:14:03 PDT 2008
From 01d43fd80fe8ca463ec01c073bf3d8c03c7daa26 Mon Sep 17 00:00:00 2001
From: Olaf Kirch <olaf.kirch at oracle.com>
Date: Thu, 24 Apr 2008 00:49:37 -0700
Subject: [PATCH] Add new rds-ping utility
This adds a new utility that acts a lot like the traditional ping
command, but uses RDS instead of ICMP. Its main purpose is to have
a simple tool to check the reachability of remote nodes.
The required kernel patch is posted separately.
Signed-off-by: Olaf Kirch <olaf.kirch at oracle.com>
---
Makefile.in | 3 +-
rds-ping.1 | 69 +++++++++++
rds-ping.c | 385 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 456 insertions(+), 1 deletions(-)
create mode 100644 rds-ping.1
create mode 100644 rds-ping.c
diff --git a/Makefile.in b/Makefile.in
index 7cad5f1..363bb58 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -24,7 +24,7 @@ else
COMMON_OBJECTS = $(subst .c,.o,$(filter-out pfhack.c,$(COMMON_SOURCES)))
endif
-PROGRAMS = rds-gen rds-sink rds-info rds-stress
+PROGRAMS = rds-gen rds-sink rds-info rds-stress rds-ping
all-programs: $(PROGRAMS)
@@ -65,6 +65,7 @@ EXTRA_DIST := rds-info.1 \
rds-gen.1 \
rds-sink.1 \
rds-stress.1 \
+ rds-ping.1 \
rds.7 \
rds-rdma.7 \
Makefile.in \
diff --git a/rds-ping.1 b/rds-ping.1
new file mode 100644
index 0000000..ae06787
--- /dev/null
+++ b/rds-ping.1
@@ -0,0 +1,69 @@
+.Dd Apr 22, 2008
+.Dt RDS-PING 1
+.Os
+.Sh NAME
+.Nm rds-ping
+.Nd test reachability of remote node over RDS
+.Pp
+.Sh SYNOPSIS
+.Nm rds-ping
+.Bk -words
+.Op Fl c Ar count
+.Op Fl i Ar interval
+.Op Fl I Ar local_addr
+.Ar remote_addr
+
+.Sh DESCRIPTION
+.Nm rds-ping
+is used to test whether a remote node is reachable over RDS.
+Its interface is designed to operate pretty much the standard
+.Xr ping 8
+utility, even though the way it works is pretty different.
+.Pp
+.Nm rds-ping
+opens several RDS sockets and sends packets to port 0 on
+the indicated host. This is a special port number to which
+no socket is bound; instead, the kernel processes incoming
+packets and responds to them.
+.Sh OPTIONS
+The following options are available for use on the command line:
+.Bl -tag -width Ds
+.It Fl c Ar count
+Causes
+.Nm rds-ping
+to exit after sending (and receiving) the specified number of
+packets.
+.It Fl I Ar address
+By default,
+.Nm rds-ping
+will pick the local source address for the RDS socket based
+on routing information for the destination address (i.e. if
+packets to the given destination would be routed through interface
+.Nm ib0 ,
+then it will use the IP address of
+.Nm ib0
+as source address).
+Using the
+.Fl I
+option, you can override this choice.
+.It Fl i Ar timeout
+By default,
+.Nm rds-ping
+will wait for one second between sending packets. Use this option
+to specified a different interval. The timeout value is given in
+seconds, and can be a floating point number. Optionally, append
+.Nm msec
+or
+.Nm usec
+to specify a timeout in milliseconds or microseconds, respectively.
+.It
+Specifying a timeout considerably smaller than the packet round-trip
+time will produce unexpected results.
+.El
+.Sh AUTHORS
+.Nm rds-ping
+was written by Olaf Kirch <olaf.kirch at oracle.com>.
+.Sh SEE ALSO
+.Xr rds 7 ,
+.Xr rds-info 1 ,
+.Xr rds-stress 1 .
diff --git a/rds-ping.c b/rds-ping.c
new file mode 100644
index 0000000..e9c88fc
--- /dev/null
+++ b/rds-ping.c
@@ -0,0 +1,385 @@
+/*
+ * rds-ping utility
+ *
+ * Test reachability of a remote RDS node by sending a packet to port 0.
+ *
+ * Copyright (C) 2008 Oracle. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <errno.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <sys/poll.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include "net/rds.h"
+
+#ifdef DYNAMIC_PF_RDS
+#include "pfhack.h"
+#endif
+
+#define die(fmt...) do { \
+ fprintf(stderr, fmt); \
+ exit(1); \
+} while (0)
+
+#define die_errno(fmt, args...) do { \
+ fprintf(stderr, fmt ", errno: %d (%s)\n", ##args , errno,\
+ strerror(errno)); \
+ exit(1); \
+} while (0)
+
+static struct timeval opt_wait = { 1, 1 }; /* 1s */
+static unsigned long opt_count;
+static struct in_addr opt_srcaddr;
+static struct in_addr opt_dstaddr;
+
+/* For reasons of simplicity, RDS ping does not use a packet
+ * payload that is being echoed, the way ICMP does.
+ * Instead, we open a number of sockets on different ports, and
+ * match packet sequence numbers with ports.
+ */
+#define NSOCKETS 8
+
+struct socket {
+ int fd;
+ unsigned int sent_id;
+ struct timeval sent_ts;
+ unsigned int nreplies;
+};
+
+
+static int do_ping(void);
+static void report_packet(struct socket *sp, const struct timeval *now,
+ const struct in_addr *from, int err);
+static void usage(const char *complaint);
+static int rds_socket(struct in_addr *src, struct in_addr *dst);
+static int parse_timeval(const char *, struct timeval *);
+static int parse_long(const char *ptr, unsigned long *);
+static int parse_addr(const char *ptr, struct in_addr *);
+
+int
+main(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "c:i:I:")) != -1) {
+ switch (c) {
+ case 'c':
+ if (!parse_long(optarg, &opt_count))
+ die("Bad packet count <%s>\n", optarg);
+ break;
+
+ case 'I':
+ if (!parse_addr(optarg, &opt_srcaddr))
+ die("Unknown source address <%s>\n", optarg);
+ break;
+
+ case 'i':
+ if (!parse_timeval(optarg, &opt_wait))
+ die("Bad wait time <%s>\n", optarg);
+ break;
+
+ default:
+ usage("Unknown option");
+ }
+ }
+
+ if (optind + 1 != argc)
+ usage("Missing destination address");
+ if (!parse_addr(argv[optind], &opt_dstaddr))
+ die("Cannot parse destination address <%s>\n", argv[optind]);
+
+ return do_ping();
+}
+
+/* returns a - b in usecs */
+static inline long
+usec_sub(const struct timeval *a, const struct timeval *b)
+{
+ return ((long)(a->tv_sec - b->tv_sec) * 1000000UL) + a->tv_usec - b->tv_usec;
+}
+
+static int
+do_ping(void)
+{
+ struct sockaddr_in sin;
+ unsigned int sent = 0, recv = 0;
+ struct timeval next_ts;
+ struct socket socket[NSOCKETS];
+ struct pollfd pfd[NSOCKETS];
+ int i, next = 0;
+
+ for (i = 0; i < NSOCKETS; ++i) {
+ int fd;
+
+ fd = rds_socket(&opt_srcaddr, &opt_dstaddr);
+
+ socket[i].fd = fd;
+ pfd[i].fd = fd;
+ pfd[i].events = POLLIN;
+ }
+
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = AF_INET;
+ sin.sin_addr = opt_dstaddr;
+
+ gettimeofday(&next_ts, NULL);
+ while (1) {
+ struct timeval now;
+ struct sockaddr_in from;
+ socklen_t alen = sizeof(from);
+ long deadline;
+ int ret;
+
+ /* Fast way out - if we have received all packets, bail now.
+ * If we're still waiting for some to come back, we need
+ * to do the poll() below */
+ if (opt_count && recv >= opt_count)
+ break;
+
+ gettimeofday(&now, NULL);
+ if (timercmp(&now, &next_ts, >=)) {
+ struct socket *sp = &socket[next];
+ int err = 0;
+
+ if (opt_count && sent >= opt_count)
+ break;
+
+ timeradd(&next_ts, &opt_wait, &next_ts);
+ if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin)))
+ err = errno;
+ sp->sent_id = ++sent;
+ sp->sent_ts = now;
+ sp->nreplies = 0;
+ next = (next + 1) % NSOCKETS;
+
+ if (err) {
+ static unsigned int nerrs = 0;
+
+ report_packet(sp, NULL, NULL, err);
+ if (err == EINVAL && nerrs++ == 0)
+ printf(" Maybe your kernel does not support rds ping yet\n");
+ }
+ }
+
+ deadline = usec_sub(&next_ts, &now);
+ ret = poll(pfd, NSOCKETS, deadline / 1000);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ die_errno("poll");
+ }
+ if (ret == 0)
+ continue;
+
+ for (i = 0; i < NSOCKETS; ++i) {
+ struct socket *sp = &socket[i];
+
+ if (!(pfd[i].revents & POLLIN))
+ continue;
+
+ ret = recvfrom(sp->fd, NULL, 0, MSG_DONTWAIT,
+ (struct sockaddr *) &from, &alen);
+ gettimeofday(&now, NULL);
+
+ if (ret < 0) {
+ if (errno != EAGAIN &&
+ errno != EINTR)
+ report_packet(sp, &now, NULL, errno);
+ } else {
+ report_packet(sp, &now, &from.sin_addr, 0);
+ recv++;
+ }
+ }
+ }
+
+ /* Program exit code: signal success if we received any response. */
+ return recv == 0;
+}
+
+static void
+report_packet(struct socket *sp, const struct timeval *now,
+ const struct in_addr *from_addr, int err)
+{
+ printf(" %3u:", sp->sent_id);
+ if (now)
+ printf(" %ld usec", usec_sub(now, &sp->sent_ts));
+ if (from_addr && from_addr->s_addr != opt_dstaddr.s_addr)
+ printf(" (%s)", inet_ntoa(*from_addr));
+ if (sp->nreplies)
+ printf(" DUP!");
+ if (err)
+ printf(" ERROR: %s", strerror(err));
+ printf("\n");
+
+ sp->nreplies++;
+}
+
+static int
+rds_socket(struct in_addr *src, struct in_addr *dst)
+{
+ struct sockaddr_in sin;
+ int fd;
+
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = AF_INET;
+
+ fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
+ if (fd < 0)
+ die_errno("unable to create RDS socket");
+
+ /* Guess the local source addr if not given. */
+ if (src->s_addr == 0) {
+ socklen_t alen;
+ int ufd;
+
+ ufd = socket(PF_INET, SOCK_DGRAM, 0);
+ if (ufd < 0)
+ die_errno("unable to create UDP socket");
+ sin.sin_addr = *dst;
+ sin.sin_port = htons(1);
+ if (connect(ufd, (struct sockaddr *) &sin, sizeof(sin)) < 0)
+ die_errno("unable to connect to %s",
+ inet_ntoa(*dst));
+
+ alen = sizeof(sin);
+ if (getsockname(ufd, (struct sockaddr *) &sin, &alen) < 0)
+ die_errno("getsockname failed");
+
+ *src = sin.sin_addr;
+ close(ufd);
+ }
+
+ sin.sin_addr = *src;
+ sin.sin_port = 0;
+
+ if (bind(fd, (struct sockaddr *) &sin, sizeof(sin)))
+ die_errno("bind() failed");
+
+ return fd;
+}
+
+static void
+usage(const char *complaint)
+{
+ fprintf(stderr,
+ "%s\nUsage: rds-ping [options] dst_addr\n"
+ "Options:\n"
+ " -c count limit packet count\n"
+ " -I interface source IP address\n",
+ complaint);
+ exit(1);
+}
+
+static int
+parse_timeval(const char *ptr, struct timeval *ret)
+{
+ double seconds;
+ char *endptr;
+
+ seconds = strtod(ptr, &endptr);
+ if (!strcmp(endptr, "ms")
+ || !strcmp(endptr, "msec")) {
+ seconds *= 1e-3;
+ } else
+ if (!strcmp(endptr, "us")
+ || !strcmp(endptr, "usec")) {
+ seconds *= 1e-6;
+ } else if (*endptr)
+ return 0;
+
+ ret->tv_sec = (long) seconds;
+ seconds -= ret->tv_sec;
+
+ ret->tv_usec = (long) (seconds * 1e6);
+ return 1;
+}
+
+static int
+parse_long(const char *ptr, unsigned long *ret)
+{
+ unsigned long long val;
+ char *endptr;
+
+ val = strtoull(ptr, &endptr, 0);
+ switch (*endptr) {
+ case 'k': case 'K':
+ val <<= 10;
+ endptr++;
+ break;
+
+ case 'm': case 'M':
+ val <<= 20;
+ endptr++;
+ break;
+
+ case 'g': case 'G':
+ val <<= 30;
+ endptr++;
+ break;
+ }
+
+ if (*endptr)
+ return 0;
+
+ *ret = val;
+ return 1;
+}
+
+static int
+parse_addr(const char *ptr, struct in_addr *ret)
+{
+ struct hostent *hent;
+
+ hent = gethostbyname(ptr);
+ if (hent &&
+ hent->h_addrtype == AF_INET && hent->h_length == sizeof(*ret)) {
+ memcpy(ret, hent->h_addr, sizeof(*ret));
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * This are completely stupid. options.c should be removed.
+ */
+void print_usage(int durr) { }
+void print_version() { }
--
1.5.4.rc3
--
Olaf Kirch | --- o --- Nous sommes du soleil we love when we play
okir at lst.de | / | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax
More information about the general
mailing list