[ofa-general] [PATCH] [RFC] librdmacm: add rdma_migrate_id
Sean Hefty
sean.hefty at intel.com
Wed Nov 28 16:32:31 PST 2007
This is based on user feedback from Doug Ledford at RedHat:
Events that occur on an rdma_cm_id are reported to userspace through
an event channel. Connection request events are reported
on the event channel associated with the listen. When the
connection is accepted, a new rdma_cm_id is created and automatically
uses the listen event channel. This is suboptimal where the user
only wants listen events on that channel.
Additionally, it may be desirable to have events related to
connection establishment use a different event channel than those
related to already established connections.
Allow the user to migrate an rdma_cm_id between event channels.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
I started to provide support for calling rdma_migrate_id() while the
user is polling for events or making other calls on the migrating id, but
while the complexity seemed doable, it just didn't seem justified
based on the expected usage model. I believe that the kernel interface
allows this support to be added later, if it is needed. For now, the
documentation simply states that the user can only migrate an id if they
are not processing events on the current event channel and not invoking
another call on that id simultaneously.
Makefile.am | 1 +
examples/cmatose.c | 59 +++++++++++++++++++++++++++++++++++++++----
include/rdma/rdma_cma.h | 7 +++++
include/rdma/rdma_cma_abi.h | 13 +++++++++
man/rdma_migrate_id.3 | 27 ++++++++++++++++++++
man/ucmatose.1 | 4 +++
src/cma.c | 35 ++++++++++++++++++++++++++
src/librdmacm.map | 1 +
8 files changed, 140 insertions(+), 7 deletions(-)
diff --git a/Makefile.am b/Makefile.am
index 77782da..290cbc3 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -54,6 +54,7 @@ man_MANS = \
man/rdma_join_multicast.3 \
man/rdma_leave_multicast.3 \
man/rdma_listen.3 \
+ man/rdma_migrate_id.3 \
man/rdma_notify.3 \
man/rdma_reject.3 \
man/rdma_resolve_addr.3 \
diff --git a/examples/cmatose.c b/examples/cmatose.c
index dcb6074..2f6e5f6 100644
--- a/examples/cmatose.c
+++ b/examples/cmatose.c
@@ -82,6 +82,7 @@ static int message_size = 100;
static int message_count = 10;
static uint8_t set_tos = 0;
static uint8_t tos;
+static uint8_t migrate = 0;
static char *dst_addr;
static char *src_addr;
@@ -465,6 +466,35 @@ static int disconnect_events(void)
return ret;
}
+static int migrate_channel(struct rdma_cm_id *listen_id)
+{
+ struct rdma_event_channel *channel;
+ int i, ret;
+
+ printf("migrating to new event channel\n");
+
+ channel = rdma_create_event_channel();
+ if (!channel) {
+ printf("cmatose: failed to create event channel\n");
+ return -1;
+ }
+
+ ret = 0;
+ if (listen_id)
+ ret = rdma_migrate_id(listen_id, channel);
+
+ for (i = 0; i < connections && !ret; i++)
+ ret = rdma_migrate_id(test.nodes[i].cma_id, channel);
+
+ if (!ret) {
+ rdma_destroy_event_channel(test.channel);
+ test.channel = channel;
+ } else
+ printf("cmatose: failure migrating to channel: %d\n", ret);
+
+ return ret;
+}
+
static int get_addr(char *dst, struct sockaddr_in *addr)
{
struct addrinfo *res;
@@ -543,6 +573,13 @@ static int run_server(void)
printf("data transfers complete\n");
}
+
+ if (migrate) {
+ ret = migrate_channel(listen_id);
+ if (ret)
+ goto out;
+ }
+
printf("cmatose: disconnecting\n");
for (i = 0; i < connections; i++) {
if (!test.nodes[i].connected)
@@ -592,30 +629,36 @@ static int run_client(void)
ret = connect_events();
if (ret)
- goto out;
+ goto disc;
if (message_count) {
printf("receiving data transfers\n");
ret = poll_cqs();
if (ret)
- goto out;
+ goto disc;
printf("sending replies\n");
for (i = 0; i < connections; i++) {
ret = post_sends(&test.nodes[i]);
if (ret)
- goto out;
+ goto disc;
}
printf("data transfers complete\n");
}
ret = 0;
-out:
+
+ if (migrate) {
+ ret = migrate_channel(NULL);
+ if (ret)
+ goto out;
+ }
+disc:
ret2 = disconnect_events();
if (ret2)
ret = ret2;
-
+out:
return ret;
}
@@ -623,7 +666,7 @@ int main(int argc, char **argv)
{
int op, ret;
- while ((op = getopt(argc, argv, "s:b:c:C:S:t:")) != -1) {
+ while ((op = getopt(argc, argv, "s:b:c:C:S:t:m")) != -1) {
switch (op) {
case 's':
dst_addr = optarg;
@@ -644,6 +687,9 @@ int main(int argc, char **argv)
set_tos = 1;
tos = (uint8_t) atoi(optarg);
break;
+ case 'm':
+ migrate = 1;
+ break;
default:
printf("usage: %s\n", argv[0]);
printf("\t[-s server_address]\n");
@@ -652,6 +698,7 @@ int main(int argc, char **argv)
printf("\t[-C message_count]\n");
printf("\t[-S message_size]\n");
printf("\t[-t type_of_service]\n");
+ printf("\t[-m(igrate)]\n");
exit(1);
}
}
diff --git a/include/rdma/rdma_cma.h b/include/rdma/rdma_cma.h
index 64b3008..76df90f 100644
--- a/include/rdma/rdma_cma.h
+++ b/include/rdma/rdma_cma.h
@@ -560,6 +560,13 @@ enum {
int rdma_set_option(struct rdma_cm_id *id, int level, int optname,
void *optval, size_t optlen);
+/**
+ * rdma_migrate_id - Move an rdma_cm_id to a new event channel.
+ * @id: Communication identifier to migrate.
+ * @channel: New event channel for rdma_cm_id events.
+ */
+int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel);
+
#ifdef __cplusplus
}
#endif
diff --git a/include/rdma/rdma_cma_abi.h b/include/rdma/rdma_cma_abi.h
index ba0e7b9..1a3a9c2 100644
--- a/include/rdma/rdma_cma_abi.h
+++ b/include/rdma/rdma_cma_abi.h
@@ -63,7 +63,8 @@ enum {
UCMA_CMD_SET_OPTION,
UCMA_CMD_NOTIFY,
UCMA_CMD_JOIN_MCAST,
- UCMA_CMD_LEAVE_MCAST
+ UCMA_CMD_LEAVE_MCAST,
+ UCMA_CMD_MIGRATE_ID
};
struct ucma_abi_cmd_hdr {
@@ -221,4 +222,14 @@ struct ucma_abi_set_option {
__u32 optlen;
};
+struct ucma_abi_migrate_id {
+ __u64 response;
+ __u32 id;
+ __u32 fd;
+};
+
+struct ucma_abi_migrate_resp {
+ __u32 events_reported;
+};
+
#endif /* RDMA_CMA_ABI_H */
diff --git a/man/rdma_migrate_id.3 b/man/rdma_migrate_id.3
new file mode 100644
index 0000000..006fb61
--- /dev/null
+++ b/man/rdma_migrate_id.3
@@ -0,0 +1,27 @@
+.TH "RDMA_MIGRATE_ID" 3 "2007-11-13" "librdmacm" "Librdmacm Programmer's Manual" librdmacm
+.SH NAME
+rdma_migrate_id \- Move a communication identifer to a different event channel.
+.SH SYNOPSIS
+.B "#include <rdma/rdma_cma.h>"
+.P
+.B "int" rdma_migrate_id
+.BI "(struct rdma_cm_id *" id ","
+.BI "struct rdma_event_channel *" channel ");"
+.SH ARGUMENTS
+.IP "id" 12
+An existing communication identifier to migrate.
+.IP "channel" 12
+The communication channel that events associated with the
+allocated rdma_cm_id will be reported on.
+.SH "DESCRIPTION"
+Migrates a communication identifier to a different event channel.
+.SH "NOTES"
+This routine migrates a communication identifier to the specified event
+channel and moves any pending events associated with the rdma_cm_id
+to the new channel. Users should not poll for events on the
+rdma_cm_id's current event channel or invoke other routines on the
+rdma_cm_id while migrating between channels. This call will block while
+there are any unacknowledged events on the current event channel.
+.SH "SEE ALSO"
+rdma_cm(7), rdma_create_event_channel(3), rdma_create_id(3),
+rdma_get_cm_event(3)
\ No newline at end of file
diff --git a/man/ucmatose.1 b/man/ucmatose.1
index 121ea63..73477ea 100644
--- a/man/ucmatose.1
+++ b/man/ucmatose.1
@@ -35,6 +35,10 @@ The size of each message transferred, in bytes. (default 100)
\-t tos
Indicates the type of service used for the communication. Type of service
is implementation dependent based on subnet configuration.
+.TP
+\-m
+Tests event channel migration. Migrates all communication identifiers to
+a different event channel for disconnect events.
.SH "NOTES"
Basic usage is to start ucmatose on a server system, then run
ucmatose -s server_name on a client system.
diff --git a/src/cma.c b/src/cma.c
index 00ea394..20cfc19 100644
--- a/src/cma.c
+++ b/src/cma.c
@@ -1471,3 +1471,38 @@ int rdma_set_option(struct rdma_cm_id *id, int level, int optname,
return 0;
}
+
+int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel)
+{
+ struct ucma_abi_migrate_resp *resp;
+ struct ucma_abi_migrate_id *cmd;
+ struct cma_id_private *id_priv;
+ void *msg;
+ int ret, size;
+
+ id_priv = container_of(id, struct cma_id_private, id);
+ CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_MIGRATE_ID, size);
+ cmd->id = id_priv->handle;
+ cmd->fd = id->channel->fd;
+
+ ret = write(channel->fd, msg, size);
+ if (ret != size)
+ return (ret > 0) ? -ENODATA : ret;
+
+ VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
+
+ /*
+ * Eventually if we want to support migrating channels while events are
+ * being processed on the current channel, we need to block here while
+ * there are any outstanding events on the current channel for this id
+ * to prevent the user from processing events for this id on the old
+ * channel after this call returns.
+ */
+ pthread_mutex_lock(&id_priv->mut);
+ id->channel = channel;
+ while (id_priv->events_completed < resp->events_reported)
+ pthread_cond_wait(&id_priv->cond, &id_priv->mut);
+ pthread_mutex_unlock(&id_priv->mut);
+
+ return 0;
+}
diff --git a/src/librdmacm.map b/src/librdmacm.map
index 84cec7f..cb94efe 100644
--- a/src/librdmacm.map
+++ b/src/librdmacm.map
@@ -27,5 +27,6 @@ RDMACM_1.0 {
rdma_set_option;
rdma_get_local_addr;
rdma_get_peer_addr;
+ rdma_migrate_id;
local: *;
};
More information about the general
mailing list