[ofa-general] ***SPAM*** Re: [PATCH 8/8]: rds-tools: add new rds-ping utility

Olaf Kirch okir at lst.de
Thu Apr 24 02:14:03 PDT 2008


From 01d43fd80fe8ca463ec01c073bf3d8c03c7daa26 Mon Sep 17 00:00:00 2001
From: Olaf Kirch <olaf.kirch at oracle.com>
Date: Thu, 24 Apr 2008 00:49:37 -0700
Subject: [PATCH] Add new rds-ping utility

This adds a new utility that acts a lot like the traditional ping
command, but uses RDS instead of ICMP. Its main purpose is to have
a simple tool to check the reachability of remote nodes.

The required kernel patch is posted separately.

Signed-off-by: Olaf Kirch <olaf.kirch at oracle.com>
---
 Makefile.in |    3 +-
 rds-ping.1  |   69 +++++++++++
 rds-ping.c  |  385 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 456 insertions(+), 1 deletions(-)
 create mode 100644 rds-ping.1
 create mode 100644 rds-ping.c

diff --git a/Makefile.in b/Makefile.in
index 7cad5f1..363bb58 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -24,7 +24,7 @@ else
 COMMON_OBJECTS = $(subst .c,.o,$(filter-out pfhack.c,$(COMMON_SOURCES)))
 endif
 
-PROGRAMS = rds-gen rds-sink rds-info rds-stress
+PROGRAMS = rds-gen rds-sink rds-info rds-stress rds-ping
 
 all-programs: $(PROGRAMS)
 
@@ -65,6 +65,7 @@ EXTRA_DIST := 	rds-info.1 \
 		rds-gen.1 \
 		rds-sink.1 \
 		rds-stress.1 \
+		rds-ping.1 \
 		rds.7 \
 		rds-rdma.7 \
 		Makefile.in \
diff --git a/rds-ping.1 b/rds-ping.1
new file mode 100644
index 0000000..ae06787
--- /dev/null
+++ b/rds-ping.1
@@ -0,0 +1,69 @@
+.Dd Apr 22, 2008
+.Dt RDS-PING 1
+.Os
+.Sh NAME
+.Nm rds-ping
+.Nd test reachability of remote node over RDS
+.Pp
+.Sh SYNOPSIS
+.Nm rds-ping
+.Bk -words
+.Op Fl c Ar count
+.Op Fl i Ar interval
+.Op Fl I Ar local_addr
+.Ar remote_addr
+
+.Sh DESCRIPTION
+.Nm rds-ping
+is used to test whether a remote node is reachable over RDS.
+Its interface is designed to operate pretty much the standard
+.Xr ping 8
+utility, even though the way it works is pretty different.
+.Pp
+.Nm rds-ping
+opens several RDS sockets and sends packets to port 0 on
+the indicated host. This is a special port number to which
+no socket is bound; instead, the kernel processes incoming
+packets and responds to them.
+.Sh OPTIONS
+The following options are available for use on the command line:
+.Bl -tag -width Ds
+.It Fl c Ar count
+Causes
+.Nm rds-ping
+to exit after sending (and receiving) the specified number of
+packets.
+.It Fl I Ar address
+By default,
+.Nm rds-ping
+will pick the local source address for the RDS socket based
+on routing information for the destination address (i.e. if
+packets to the given destination would be routed through interface
+.Nm ib0 ,
+then it will use the IP address of
+.Nm ib0
+as source address).
+Using the
+.Fl I
+option, you can override this choice.
+.It Fl i Ar timeout
+By default,
+.Nm rds-ping
+will wait for one second between sending packets. Use this option
+to specified a different interval. The timeout value is given in
+seconds, and can be a floating point number. Optionally, append
+.Nm msec
+or
+.Nm usec
+to specify a timeout in milliseconds or microseconds, respectively.
+.It
+Specifying a timeout considerably smaller than the packet round-trip
+time will produce unexpected results.
+.El
+.Sh AUTHORS
+.Nm rds-ping
+was written by Olaf Kirch <olaf.kirch at oracle.com>.
+.Sh SEE ALSO
+.Xr rds 7 ,
+.Xr rds-info 1 ,
+.Xr rds-stress 1 .
diff --git a/rds-ping.c b/rds-ping.c
new file mode 100644
index 0000000..e9c88fc
--- /dev/null
+++ b/rds-ping.c
@@ -0,0 +1,385 @@
+/*
+ * rds-ping utility
+ *
+ * Test reachability of a remote RDS node by sending a packet to port 0.
+ *
+ * Copyright (C) 2008 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <errno.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <sys/poll.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include "net/rds.h"
+
+#ifdef DYNAMIC_PF_RDS
+#include "pfhack.h"
+#endif
+
+#define die(fmt...) do {		\
+	fprintf(stderr, fmt);		\
+	exit(1);			\
+} while (0)
+
+#define die_errno(fmt, args...) do {				\
+	fprintf(stderr, fmt ", errno: %d (%s)\n", ##args , errno,\
+		strerror(errno));				\
+	exit(1);						\
+} while (0)
+
+static struct timeval	opt_wait = { 1, 1 };		/* 1s */
+static unsigned long	opt_count;
+static struct in_addr	opt_srcaddr;
+static struct in_addr	opt_dstaddr;
+
+/* For reasons of simplicity, RDS ping does not use a packet
+ * payload that is being echoed, the way ICMP does.
+ * Instead, we open a number of sockets on different ports, and
+ * match packet sequence numbers with ports.
+ */
+#define NSOCKETS	8
+
+struct socket {
+	int fd;
+	unsigned int sent_id;
+	struct timeval sent_ts;
+	unsigned int nreplies;
+};
+
+
+static int	do_ping(void);
+static void	report_packet(struct socket *sp, const struct timeval *now,
+			const struct in_addr *from, int err);
+static void	usage(const char *complaint);
+static int	rds_socket(struct in_addr *src, struct in_addr *dst);
+static int	parse_timeval(const char *, struct timeval *);
+static int	parse_long(const char *ptr, unsigned long *);
+static int	parse_addr(const char *ptr, struct in_addr *);
+
+int
+main(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "c:i:I:")) != -1) {
+		switch (c) {
+		case 'c':
+			if (!parse_long(optarg, &opt_count))
+				die("Bad packet count <%s>\n", optarg);
+			break;
+
+		case 'I':
+			if (!parse_addr(optarg, &opt_srcaddr))
+				die("Unknown source address <%s>\n", optarg);
+			break;
+
+		case 'i':
+			if (!parse_timeval(optarg, &opt_wait))
+				die("Bad wait time <%s>\n", optarg);
+			break;
+
+		default:
+			usage("Unknown option");
+		}
+	}
+
+	if (optind + 1 != argc)
+		usage("Missing destination address");
+	if (!parse_addr(argv[optind], &opt_dstaddr))
+		die("Cannot parse destination address <%s>\n", argv[optind]);
+
+	return do_ping();
+}
+
+/* returns a - b in usecs */
+static inline long
+usec_sub(const struct timeval *a, const struct timeval *b)
+{
+	return ((long)(a->tv_sec - b->tv_sec) * 1000000UL) + a->tv_usec - b->tv_usec;
+}
+
+static int
+do_ping(void)
+{
+	struct sockaddr_in sin;
+	unsigned int	sent = 0, recv = 0;
+	struct timeval	next_ts;
+	struct socket	socket[NSOCKETS];
+	struct pollfd	pfd[NSOCKETS];
+	int		i, next = 0;
+
+	for (i = 0; i < NSOCKETS; ++i) {
+		int fd;
+
+		fd = rds_socket(&opt_srcaddr, &opt_dstaddr);
+
+		socket[i].fd = fd;
+		pfd[i].fd = fd;
+		pfd[i].events = POLLIN;
+	}
+
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_family = AF_INET;
+	sin.sin_addr = opt_dstaddr;
+
+	gettimeofday(&next_ts, NULL);
+	while (1) {
+		struct timeval	now;
+		struct sockaddr_in from;
+		socklen_t	alen = sizeof(from);
+		long		deadline;
+		int		ret;
+
+		/* Fast way out - if we have received all packets, bail now.
+		 * If we're still waiting for some to come back, we need
+		 * to do the poll() below */
+		if (opt_count && recv >= opt_count)
+			break;
+
+		gettimeofday(&now, NULL);
+		if (timercmp(&now, &next_ts, >=)) {
+			struct socket *sp = &socket[next];
+			int err = 0;
+
+			if (opt_count && sent >= opt_count)
+				break;
+
+			timeradd(&next_ts, &opt_wait, &next_ts);
+			if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin)))
+				err = errno;
+			sp->sent_id = ++sent;
+			sp->sent_ts = now;
+			sp->nreplies = 0;
+			next = (next + 1) % NSOCKETS;
+
+			if (err) {
+				static unsigned int nerrs = 0;
+
+				report_packet(sp, NULL, NULL, err);
+				if (err == EINVAL && nerrs++ == 0)
+					printf("      Maybe your kernel does not support rds ping yet\n");
+			}
+		}
+
+		deadline = usec_sub(&next_ts, &now);
+		ret = poll(pfd, NSOCKETS, deadline / 1000);
+		if (ret < 0) {
+			if (errno == EINTR)
+				continue;
+			die_errno("poll");
+		}
+		if (ret == 0)
+			continue;
+
+		for (i = 0; i < NSOCKETS; ++i) {
+			struct socket *sp = &socket[i];
+
+			if (!(pfd[i].revents & POLLIN))
+				continue;
+
+			ret = recvfrom(sp->fd, NULL, 0, MSG_DONTWAIT,
+					(struct sockaddr *) &from, &alen);
+			gettimeofday(&now, NULL);
+
+			if (ret < 0) {
+				if (errno != EAGAIN &&
+				    errno != EINTR)
+					report_packet(sp, &now, NULL, errno);
+			} else {
+				report_packet(sp, &now, &from.sin_addr, 0);
+				recv++;
+			}
+		}
+	}
+
+	/* Program exit code: signal success if we received any response. */
+	return recv == 0;
+}
+
+static void
+report_packet(struct socket *sp, const struct timeval *now,
+		const struct in_addr *from_addr, int err)
+{
+	printf(" %3u:", sp->sent_id);
+	if (now)
+		printf(" %ld usec", usec_sub(now, &sp->sent_ts));
+	if (from_addr && from_addr->s_addr != opt_dstaddr.s_addr)
+		printf(" (%s)", inet_ntoa(*from_addr));
+	if (sp->nreplies)
+		printf(" DUP!");
+	if (err)
+		printf(" ERROR: %s", strerror(err));
+	printf("\n");
+
+	sp->nreplies++;
+}
+
+static int
+rds_socket(struct in_addr *src, struct in_addr *dst)
+{
+	struct sockaddr_in sin;
+	int fd;
+
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_family = AF_INET;
+
+	fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
+	if (fd < 0)
+		die_errno("unable to create RDS socket");
+
+	/* Guess the local source addr if not given. */
+	if (src->s_addr == 0) {
+		socklen_t alen;
+		int ufd;
+
+		ufd = socket(PF_INET, SOCK_DGRAM, 0);
+		if (ufd < 0)
+			die_errno("unable to create UDP socket");
+		sin.sin_addr = *dst;
+		sin.sin_port = htons(1);
+		if (connect(ufd, (struct sockaddr *) &sin, sizeof(sin)) < 0)
+			die_errno("unable to connect to %s",
+					inet_ntoa(*dst));
+
+		alen = sizeof(sin);
+		if (getsockname(ufd, (struct sockaddr *) &sin, &alen) < 0)
+			die_errno("getsockname failed");
+
+		*src = sin.sin_addr;
+		close(ufd);
+	}
+
+	sin.sin_addr = *src;
+	sin.sin_port = 0;
+
+	if (bind(fd, (struct sockaddr *) &sin, sizeof(sin)))
+		die_errno("bind() failed");
+
+	return fd;
+}
+
+static void
+usage(const char *complaint)
+{
+	fprintf(stderr,
+		"%s\nUsage: rds-ping [options] dst_addr\n"
+		"Options:\n"
+		" -c count      limit packet count\n"
+		" -I interface  source IP address\n",
+		complaint);
+	exit(1);
+}
+
+static int
+parse_timeval(const char *ptr, struct timeval *ret)
+{
+	double	seconds;
+	char *endptr;
+
+	seconds = strtod(ptr, &endptr);
+	if (!strcmp(endptr, "ms")
+	 || !strcmp(endptr, "msec")) {
+		seconds *= 1e-3;
+	} else
+	if (!strcmp(endptr, "us")
+	 || !strcmp(endptr, "usec")) {
+		seconds *= 1e-6;
+	} else if (*endptr)
+		return 0;
+
+	ret->tv_sec = (long) seconds;
+	seconds -= ret->tv_sec;
+
+	ret->tv_usec = (long) (seconds * 1e6);
+	return 1;
+}
+
+static int
+parse_long(const char *ptr, unsigned long *ret)
+{
+	unsigned long long val;
+	char *endptr;
+
+	val = strtoull(ptr, &endptr, 0);
+	switch (*endptr) {
+	case 'k': case 'K':
+		val <<= 10;
+		endptr++;
+		break;
+
+	case 'm': case 'M':
+		val <<= 20;
+		endptr++;
+		break;
+
+	case 'g': case 'G':
+		val <<= 30;
+		endptr++;
+		break;
+	}
+
+	if (*endptr)
+		return 0;
+
+	*ret = val;
+	return 1;
+}
+
+static int
+parse_addr(const char *ptr, struct in_addr *ret)
+{
+        struct hostent *hent;
+
+        hent = gethostbyname(ptr);
+        if (hent &&
+            hent->h_addrtype == AF_INET && hent->h_length == sizeof(*ret)) {
+		memcpy(ret, hent->h_addr, sizeof(*ret));
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * This are completely stupid.  options.c should be removed.
+ */
+void print_usage(int durr) { }
+void print_version() { }
-- 
1.5.4.rc3



-- 
Olaf Kirch  |  --- o --- Nous sommes du soleil we love when we play
okir at lst.de |    / | \   sol.dhoop.naytheet.ah kin.ir.samse.qurax



More information about the general mailing list