[openib-general] [PATCH] [CM] 1/5 per device communication identifiers

Sean Hefty sean.hefty at intel.com
Thu Sep 15 10:35:48 PDT 2005


The following patch binds communication identifiers to a device.
It exports per HCA devices to userspace.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>


Index: userspace/libibcm/include/infiniband/cm_abi.h
===================================================================
--- userspace/libibcm/include/infiniband/cm_abi.h	(revision 3433)
+++ userspace/libibcm/include/infiniband/cm_abi.h	(working copy)
@@ -42,7 +42,8 @@
  * drivers/infiniband/include/ib_user_cm.h
  */
 
-#define IB_USER_CM_ABI_VERSION 2
+#define IB_USER_CM_MIN_ABI_VERSION	3
+#define IB_USER_CM_MAX_ABI_VERSION	3
 
 enum {
 	IB_USER_CM_CMD_CREATE_ID,
@@ -303,8 +304,6 @@ struct cm_abi_event_get {
 };
 
 struct cm_abi_req_event_resp {
-	/* device */
-	/* port */
 	struct cm_abi_path_rec primary_path;
 	struct cm_abi_path_rec alternate_path;
 	__u64                  remote_ca_guid;
@@ -320,6 +319,7 @@ struct cm_abi_req_event_resp {
 	__u8  retry_count;
 	__u8  rnr_retry_count;
 	__u8  srq;
+	__u8  port;
 };
 
 struct cm_abi_rep_event_resp {
@@ -357,10 +357,9 @@ struct cm_abi_apr_event_resp {
 };
 
 struct cm_abi_sidr_req_event_resp {
-	/* device */
-	/* port */
 	__u16 pkey;
-	__u8  reserved[2];
+	__u8  port;
+	__u8  reserved;
 };
 
 struct cm_abi_sidr_rep_event_resp {
Index: userspace/libibcm/include/infiniband/cm.h
===================================================================
--- userspace/libibcm/include/infiniband/cm.h	(revision 3433)
+++ userspace/libibcm/include/infiniband/cm.h	(working copy)
@@ -77,13 +77,21 @@ enum ib_cm_data_size {
 	IB_CM_SIDR_REP_INFO_LENGTH	 = 72
 };
 
+struct ib_cm_device {
+	uint64_t guid;
+	int	 fd;
+};
+
 struct ib_cm_id {
 	void			*context;
+	struct ibv_context	*device_context;
+	struct ib_cm_device	*device;
 	uint32_t		handle;
 };
 
 struct ib_cm_req_event_param {
 	struct ib_cm_id		*listen_id;
+	uint8_t			port;
 
 	struct ib_sa_path_rec	*primary_path;
 	struct ib_sa_path_rec	*alternate_path;
@@ -193,7 +201,6 @@ struct ib_cm_apr_event_param {
 
 struct ib_cm_sidr_req_event_param {
 	struct ib_cm_id	 *listen_id;
-	struct ib_device *device;
 	uint8_t		  port;
 	uint16_t          pkey;
 };
@@ -239,6 +246,7 @@ struct ib_cm_event {
 /**
  * ib_cm_get_event - Retrieves the next pending communications event,
  *   if no event is pending waits for an event.
+ * @device: CM device to retrieve the event.
  * @event: Allocated information about the next communication event.
  *    Event should be freed using ib_cm_ack_event()
  *
@@ -249,19 +257,7 @@ struct ib_cm_event {
  * IB_CM_REQ_RECEIVED and all other events, the returned @cm_id corresponds
  * to a user's existing communication identifier.
  */
-int ib_cm_get_event(struct ib_cm_event **event);
-
-/**
- * ib_cm_get_event_timed - Retrieves the next pending communications event,
- *   if no event is pending wait up to a certain timeout for an event.
- * @timeout_ms: Maximum time in milliseconds to wait for an event.
- * @event: Allocated information about the next communication event.
- *    Event should be freed using ib_cm_ack_event()
- *
- * If timeout expires without an event, the error -ETIMEDOUT will be
- * returned
- */
-int ib_cm_get_event_timed(int timeout_ms, struct ib_cm_event **event);
+int ib_cm_get_event(struct ib_cm_device *device, struct ib_cm_event **event);
 
 /**
  * ib_cm_ack_event - Free a communications event.
@@ -272,19 +268,21 @@ int ib_cm_get_event_timed(int timeout_ms
  * and puts.
  */
 int ib_cm_ack_event(struct ib_cm_event *event);
-
+ 
 /**
- * ib_cm_get_fd - Returns the file descriptor which the CM uses to
- *   submit requests and retrieve events.
+ * ib_cm_get_device - Returns the device the CM uses to submit requests
+ *   and retrieve events that corresponds to the specified verbs device.
  *
- * The primary use of the file descriptor is to test for CM readiness
- * events. When the CM becomes ready to READ there is a pending event
- * ready, and a subsequent call to ib_cm_get_event will not block.
+ * The CM device contains the file descriptor that the CM uses to
+ * communicate with the kernel CM component.  The primary use of the
+ * file descriptor is to test for CM readiness events. When the CM
+ * becomes ready to READ there is a pending event ready, and a subsequent
+ * call to ib_cm_get_event will not block.
  * Note: The user should not read or write directly to the CM file
  *       descriptor, it will likely result in an error or unexpected
  *       results.
  */
-int ib_cm_get_fd(void);
+struct ib_cm_device* ib_cm_get_device(struct ibv_context *device_context);
 
 /**
  * ib_cm_create_id - Allocate a communication identifier.
@@ -292,7 +290,8 @@ int ib_cm_get_fd(void);
  * Communication identifiers are used to track connection states, service
  * ID resolution requests, and listen requests.
  */
-int ib_cm_create_id(struct ib_cm_id **cm_id, void *context);
+int ib_cm_create_id(struct ibv_context *device_context,
+		    struct ib_cm_id **cm_id, void *context);
 
 /**
  * ib_cm_destroy_id - Destroy a connection identifier.
Index: userspace/libibcm/src/cm.c
===================================================================
--- userspace/libibcm/src/cm.c	(revision 3433)
+++ userspace/libibcm/src/cm.c	(working copy)
@@ -47,12 +47,21 @@
 #include <poll.h>
 #include <unistd.h>
 #include <pthread.h>
+#include <endian.h>
+#include <byteswap.h>
 
 #include <infiniband/cm.h>
 #include <infiniband/cm_abi.h>
 
-#define IB_UCM_DEV_PATH "/dev/infiniband/ucm"
-#define PFX "libucm: "
+#define PFX "libibcm: "
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+static inline uint64_t htonll(uint64_t x) { return bswap_64(x); }
+static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); }
+#else
+static inline uint64_t htonll(uint64_t x) { return x; }
+static inline uint64_t ntohll(uint64_t x) { return x; }
+#endif
 
 #define CM_CREATE_MSG_CMD_RESP(msg, cmd, resp, type, size) \
 do {                                        \
@@ -97,19 +106,164 @@ struct cm_id_private {
 	pthread_mutex_t mut;
 };
 
-static int fd;
+static struct dlist *device_list;
 
 #define container_of(ptr, type, field) \
 	((type *) ((void *)ptr - offsetof(type, field)))
 
+static int check_abi_version(void)
+{
+	char path[256];
+	char val[16];
+	int abi_ver;
+
+	if (sysfs_get_mnt_path(path, sizeof path)) {
+		fprintf(stderr, PFX "couldn't find sysfs mount.\n");
+		return -1;
+	}
+
+	strncat(path, "/class/infiniband_cm/abi_version", sizeof path);
+	if (sysfs_read_attribute_value(path, val, sizeof val)) {
+		fprintf(stderr, PFX "couldn't read ucm ABI version.\n");
+		return -1;
+	}
+
+	abi_ver = strtol(val, NULL, 10);
+	if (abi_ver < IB_USER_CM_MIN_ABI_VERSION ||
+	    abi_ver > IB_USER_CM_MAX_ABI_VERSION) {
+		fprintf(stderr, PFX "kernel ABI version %d "
+			"doesn't match library version %d.\n",
+			abi_ver, IB_USER_CM_MAX_ABI_VERSION);
+		return -1;
+	}
+	return 0;
+}
+
+static uint64_t get_device_guid(struct sysfs_class_device *ibdev)
+{
+	struct sysfs_attribute *attr;
+	uint64_t guid = 0;
+	uint16_t parts[4];
+	int i;
+
+	attr = sysfs_get_classdev_attr(ibdev, "node_guid");
+	if (!attr)
+		return 0;
+
+	if (sscanf(attr->value, "%hx:%hx:%hx:%hx",
+		   parts, parts + 1, parts + 2, parts + 3) != 4)
+		return 0;
+
+	for (i = 0; i < 4; ++i)
+		guid = (guid << 16) | parts[i];
+
+	return htonll(guid);
+}
+
+static struct ib_cm_device* open_device(struct sysfs_class_device *cm_dev)
+{
+	struct sysfs_class_device *ib_dev;
+	struct sysfs_attribute *attr;
+	struct ib_cm_device *dev;
+	char ibdev_name[64];
+	char *devpath;
+
+	dev = malloc(sizeof *dev);
+	if (!dev)
+		return NULL;
+
+	attr = sysfs_get_classdev_attr(cm_dev, "ibdev");
+	if (!attr) {
+		fprintf(stderr, PFX "no ibdev class attr for %s\n",
+			cm_dev->name);
+		goto err;
+	}
+
+	sscanf(attr->value, "%63s", ibdev_name);
+	ib_dev = sysfs_open_class_device("infiniband", ibdev_name);
+	if (!ib_dev)
+		goto err;
+
+	dev->guid = get_device_guid(ib_dev);
+	sysfs_close_class_device(ib_dev);
+	if (!dev->guid)
+		goto err;
+
+	asprintf(&devpath, "/dev/infiniband/%s", cm_dev->name);
+	dev->fd = open(devpath, O_RDWR);
+	if (dev->fd < 0) {
+		fprintf(stderr, PFX "error <%d:%d> opening device <%s>\n",
+			dev->fd, errno, devpath);
+		goto err;
+	}
+	return dev;
+err:
+	free(dev);
+	return NULL;
+}
+
 static void __attribute__((constructor)) ib_cm_init(void)
 {
-	fd = open(IB_UCM_DEV_PATH, O_RDWR);
-        if (fd < 0)
-		fprintf(stderr, PFX
-			"Error <%d:%d> couldn't open IB cm device <%s>\n",
-			fd, errno, IB_UCM_DEV_PATH);
+	struct sysfs_class *cls;
+	struct dlist *cm_dev_list;
+	struct sysfs_class_device *cm_dev;
+	struct ib_cm_device *dev;
+
+	device_list = dlist_new(sizeof(struct ib_cm_device));
+	if (!device_list) {
+		fprintf(stderr, PFX "couldn't allocate device list.\n");
+		abort();
+	}
+
+	cls = sysfs_open_class("infiniband_cm");
+	if (!cls) {
+		fprintf(stderr, PFX "couldn't open 'infiniband_cm'.\n");
+		goto err;
+	}
+
+	if (check_abi_version())
+		goto err;
 
+	cm_dev_list = sysfs_get_class_devices(cls);
+	if (!cm_dev_list) {
+		fprintf(stderr, PFX "no class devices found.\n");
+		goto err;
+	}
+
+	dlist_for_each_data(cm_dev_list, cm_dev, struct sysfs_class_device) {
+		dev = open_device(cm_dev);
+		if (dev)
+			dlist_push(device_list, dev);
+	}
+	return;
+err:
+	sysfs_close_class(cls);
+}
+
+static void __attribute__((destructor)) ib_cm_fini(void)
+{
+	struct ib_cm_device *dev;
+
+	if (!device_list)
+		return;
+
+	dlist_for_each_data(device_list, dev, struct ib_cm_device)
+		close(dev->fd);
+	
+	dlist_destroy(device_list);
+}
+
+struct ib_cm_device* ib_cm_get_device(struct ibv_context *device_context)
+{
+	struct ib_cm_device *dev;
+	uint64_t guid;
+
+	guid = ibv_get_device_guid(device_context->device);
+	dlist_for_each_data(device_list, dev, struct ib_cm_device)
+		if (dev->guid == guid)
+			return dev;
+
+	return NULL;
 }
 
 static void cm_param_path_get(struct cm_abi_path_rec *abi,
@@ -146,7 +300,8 @@ static void ib_cm_free_id(struct cm_id_p
 	free(cm_id_priv);
 }
 
-static struct cm_id_private *ib_cm_alloc_id(void *context)
+static struct cm_id_private *ib_cm_alloc_id(struct ibv_context *device_context,
+					    void *context)
 {
 	struct cm_id_private *cm_id_priv;
 
@@ -155,18 +310,24 @@ static struct cm_id_private *ib_cm_alloc
 		return NULL;
 
 	memset(cm_id_priv, 0, sizeof *cm_id_priv);
+	cm_id_priv->id.device_context = device_context;
 	cm_id_priv->id.context = context;
 	pthread_mutex_init(&cm_id_priv->mut, NULL);
 	if (pthread_cond_init(&cm_id_priv->cond, NULL))
 		goto err;
 
+	cm_id_priv->id.device = ib_cm_get_device(device_context);
+	if (!cm_id_priv->id.device)
+		goto err;
+
 	return cm_id_priv;
 
 err:	ib_cm_free_id(cm_id_priv);
 	return NULL;
 }
 
-int ib_cm_create_id(struct ib_cm_id **cm_id, void *context)
+int ib_cm_create_id(struct ibv_context *device_context,
+		    struct ib_cm_id **cm_id, void *context)
 {
 	struct cm_abi_create_id_resp *resp;
 	struct cm_abi_create_id *cmd;
@@ -175,14 +336,14 @@ int ib_cm_create_id(struct ib_cm_id **cm
 	int result;
 	int size;
 
-	cm_id_priv = ib_cm_alloc_id(context);
+	cm_id_priv = ib_cm_alloc_id(device_context, context);
 	if (!cm_id_priv)
 		return -ENOMEM;
 
 	CM_CREATE_MSG_CMD_RESP(msg, cmd, resp, IB_USER_CM_CMD_CREATE_ID, size);
 	cmd->uid = (uintptr_t) cm_id_priv;
 
-	result = write(fd, msg, size);
+	result = write(cm_id_priv->id.device->fd, msg, size);
 	if (result != size)
 		goto err;
 
@@ -206,7 +367,7 @@ int ib_cm_destroy_id(struct ib_cm_id *cm
 	CM_CREATE_MSG_CMD_RESP(msg, cmd, resp, IB_USER_CM_CMD_DESTROY_ID, size);
 	cmd->id = cm_id->handle;
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -235,7 +396,7 @@ int ib_cm_attr_id(struct ib_cm_id *cm_id
 	CM_CREATE_MSG_CMD_RESP(msg, cmd, resp, IB_USER_CM_CMD_ATTR_ID, size);
 	cmd->id = cm_id->handle;
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -317,7 +478,7 @@ int ib_cm_init_qp_attr(struct ib_cm_id *
 	cmd->id = cm_id->handle;
 	cmd->qp_state = qp_attr->qp_state;
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -341,7 +502,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id,
 	cmd->service_id   = service_id;
 	cmd->service_mask = service_mask;
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -400,7 +561,7 @@ int ib_cm_send_req(struct ib_cm_id *cm_i
 		cmd->len  = param->private_data_len;
 	}
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -435,7 +596,7 @@ int ib_cm_send_rep(struct ib_cm_id *cm_i
 		cmd->len  = param->private_data_len;
 	}
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -460,7 +621,7 @@ static inline int cm_send_private_data(s
 		cmd->len  = private_data_len;
 	}
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -501,7 +662,7 @@ int ib_cm_establish(struct ib_cm_id *cm_
 	CM_CREATE_MSG_CMD(msg, cmd, IB_USER_CM_CMD_ESTABLISH, size);
 	cmd->id = cm_id->handle;
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -535,7 +696,7 @@ static inline int cm_send_status(struct 
 		cmd->info_len = info_length;
 	}
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -585,7 +746,7 @@ int ib_cm_send_mra(struct ib_cm_id *cm_i
 		cmd->len  = private_data_len;
 	}
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -620,7 +781,7 @@ int ib_cm_send_lap(struct ib_cm_id *cm_i
 		cmd->len  = private_data_len;
 	}
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -660,7 +821,7 @@ int ib_cm_send_sidr_req(struct ib_cm_id 
 		cmd->len  = param->private_data_len;
 	}
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -694,7 +855,7 @@ int ib_cm_send_sidr_rep(struct ib_cm_id 
 		cmd->info_len = param->info_length;
 	}
 
-	result = write(fd, msg, size);
+	result = write(cm_id->device->fd, msg, size);
 	if (result != size)
 		return (result > 0) ? -ENODATA : result;
 
@@ -750,6 +911,7 @@ static void cm_event_req_get(struct ib_c
 	ureq->retry_count                = kreq->retry_count;
 	ureq->rnr_retry_count            = kreq->rnr_retry_count;
 	ureq->srq                        = kreq->srq;
+	ureq->port			 = kreq->port;
 
 	cm_event_path_get(ureq->primary_path, &kreq->primary_path);
 	cm_event_path_get(ureq->alternate_path, &kreq->alternate_path);
@@ -779,7 +941,7 @@ static void cm_event_sidr_rep_get(struct
 	urep->qpn    = krep->qpn;
 };
 
-int ib_cm_get_event(struct ib_cm_event **event)
+int ib_cm_get_event(struct ib_cm_device *device, struct ib_cm_event **event)
 {
 	struct cm_id_private *cm_id_priv;
 	struct cm_abi_cmd_hdr *hdr;
@@ -832,7 +994,7 @@ int ib_cm_get_event(struct ib_cm_event *
 	cmd->data = (uintptr_t) data;
 	cmd->info = (uintptr_t) info;
 
-	result = write(fd, msg, size);
+	result = write(device->fd, msg, size);
 	if (result != size) {
 		result = (result > 0) ? -ENODATA : result;
 		goto done;
@@ -868,7 +1030,8 @@ int ib_cm_get_event(struct ib_cm_event *
 	switch (evt->event) {
 	case IB_CM_REQ_RECEIVED:
 		evt->param.req_rcvd.listen_id = evt->cm_id;
-		cm_id_priv = ib_cm_alloc_id(evt->cm_id->context);
+		cm_id_priv = ib_cm_alloc_id(evt->cm_id->device_context,
+					    evt->cm_id->context);
 		if (!cm_id_priv) {
 			result = -ENOMEM;
 			goto done;
@@ -905,7 +1068,8 @@ int ib_cm_get_event(struct ib_cm_event *
 		break;
 	case IB_CM_SIDR_REQ_RECEIVED:
 		evt->param.sidr_req_rcvd.listen_id = evt->cm_id;
-		cm_id_priv = ib_cm_alloc_id(evt->cm_id->context);
+		cm_id_priv = ib_cm_alloc_id(evt->cm_id->device_context,
+					    evt->cm_id->context);
 		if (!cm_id_priv) {
 			result = -ENOMEM;
 			goto done;
@@ -913,6 +1077,7 @@ int ib_cm_get_event(struct ib_cm_event *
 		cm_id_priv->id.handle = resp->id;
 		evt->cm_id = &cm_id_priv->id;
 		evt->param.sidr_req_rcvd.pkey = resp->u.sidr_req_resp.pkey;
+		evt->param.sidr_req_rcvd.port = resp->u.sidr_req_resp.port;
 		break;
 	case IB_CM_SIDR_REP_RECEIVED:
 		cm_event_sidr_rep_get(&evt->param.sidr_rep_rcvd,
@@ -998,32 +1163,3 @@ int ib_cm_ack_event(struct ib_cm_event *
 	free(event);
 	return 0;
 }
-
-int ib_cm_get_fd(void)
-{
-	return fd;
-}
-
-int ib_cm_get_event_timed(int timeout_ms, struct ib_cm_event **event)
-{
-	struct pollfd ufds;
-	int result;
-
-	if (!event)
-		return -EINVAL;
-
-	ufds.fd      = ib_cm_get_fd();
-	ufds.events  = POLLIN;
-	ufds.revents = 0;
-
-	*event = NULL;
-
-	result = poll(&ufds, 1, timeout_ms);
-	if (!result)
-		return -ETIMEDOUT;
-
-	if (result < 0)
-		return result;
-
-	return ib_cm_get_event(event);
-}
Index: userspace/libibcm/examples/cmpost.c
===================================================================
--- userspace/libibcm/examples/cmpost.c	(revision 3433)
+++ userspace/libibcm/examples/cmpost.c	(working copy)
@@ -307,7 +307,7 @@ static int init_node(struct cmtest_node 
 	int cqe, ret;
 
 	if (!is_server) {
-		ret = ib_cm_create_id(&node->cm_id, node);
+		ret = ib_cm_create_id(test.verbs, &node->cm_id, node);
 		if (ret) {
 			printf("failed to create cm_id: %d\n", ret);
 			return ret;
@@ -526,7 +526,7 @@ static void connect_events(void)
 	int err = 0;
 
 	while (test.connects_left && !err) {
-		err = ib_cm_get_event(&event);
+		err = ib_cm_get_event(ib_cm_get_device(test.verbs), &event);
 		if (!err) {
 			cm_handler(event->cm_id, event);
 			ib_cm_ack_event(event);
@@ -540,7 +540,7 @@ static void disconnect_events(void)
 	int err = 0;
 
 	while (test.disconnects_left && !err) {
-		err = ib_cm_get_event(&event);
+		err = ib_cm_get_event(ib_cm_get_device(test.verbs), &event);
 		if (!err) {
 			cm_handler(event->cm_id, event);
 			ib_cm_ack_event(event);
@@ -554,7 +554,7 @@ static void run_server(void)
 	int i, ret;
 
 	printf("starting server\n");
-	if (ib_cm_create_id(&listen_id, &test)) {
+	if (ib_cm_create_id(test.verbs, &listen_id, &test)) {
 		printf("listen request failed\n");
 		return;
 	}
Index: linux-kernel/infiniband/include/rdma/ib_cm.h
===================================================================
--- linux-kernel/infiniband/include/rdma/ib_cm.h	(revision 3433)
+++ linux-kernel/infiniband/include/rdma/ib_cm.h	(working copy)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -109,7 +109,6 @@ struct ib_cm_id;
 
 struct ib_cm_req_event_param {
 	struct ib_cm_id		*listen_id;
-	struct ib_device	*device;
 	u8			port;
 
 	struct ib_sa_path_rec	*primary_path;
@@ -220,7 +219,6 @@ struct ib_cm_apr_event_param {
 
 struct ib_cm_sidr_req_event_param {
 	struct ib_cm_id		*listen_id;
-	struct ib_device	*device;
 	u8			port;
 	u16			pkey;
 };
@@ -284,6 +282,7 @@ typedef int (*ib_cm_handler)(struct ib_c
 struct ib_cm_id {
 	ib_cm_handler		cm_handler;
 	void			*context;
+	struct ib_device	*device;
 	__be64			service_id;
 	__be64			service_mask;
 	enum ib_cm_state	state;		/* internal CM/debug use */
@@ -295,6 +294,8 @@ struct ib_cm_id {
 
 /**
  * ib_create_cm_id - Allocate a communication identifier.
+ * @device: Device associated with the cm_id.  All related communication will
+ * be associated with the specified device.
  * @cm_handler: Callback invoked to notify the user of CM events.
  * @context: User specified context associated with the communication
  *   identifier.
@@ -302,7 +303,8 @@ struct ib_cm_id {
  * Communication identifiers are used to track connection states, service
  * ID resolution requests, and listen requests.
  */
-struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+				 ib_cm_handler cm_handler,
 				 void *context);
 
 /**
Index: linux-kernel/infiniband/include/rdma/ib_user_cm.h
===================================================================
--- linux-kernel/infiniband/include/rdma/ib_user_cm.h	(revision 3433)
+++ linux-kernel/infiniband/include/rdma/ib_user_cm.h	(working copy)
@@ -38,7 +38,7 @@
 
 #include <linux/types.h>
 
-#define IB_USER_CM_ABI_VERSION 2
+#define IB_USER_CM_ABI_VERSION 3
 
 enum {
 	IB_USER_CM_CMD_CREATE_ID,
@@ -299,8 +299,6 @@ struct ib_ucm_event_get {
 };
 
 struct ib_ucm_req_event_resp {
-	/* device */
-	/* port */
 	struct ib_ucm_path_rec primary_path;
 	struct ib_ucm_path_rec alternate_path;
 	__be64                 remote_ca_guid;
@@ -316,6 +314,7 @@ struct ib_ucm_req_event_resp {
 	__u8  retry_count;
 	__u8  rnr_retry_count;
 	__u8  srq;
+	__u8  port;
 };
 
 struct ib_ucm_rep_event_resp {
@@ -353,10 +352,9 @@ struct ib_ucm_apr_event_resp {
 };
 
 struct ib_ucm_sidr_req_event_resp {
-	/* device */
-	/* port */
 	__u16 pkey;
-	__u8  reserved[2];
+	__u8  port;
+	__u8  reserved;
 };
 
 struct ib_ucm_sidr_rep_event_resp {
Index: linux-kernel/infiniband/core/cm.c
===================================================================
--- linux-kernel/infiniband/core/cm.c	(revision 3433)
+++ linux-kernel/infiniband/core/cm.c	(working copy)
@@ -366,9 +366,15 @@ static struct cm_id_private * cm_insert_
 		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
 					  service_node);
 		if ((cur_cm_id_priv->id.service_mask & service_id) ==
-		    (service_mask & cur_cm_id_priv->id.service_id))
-			return cm_id_priv;
-		if (service_id < cur_cm_id_priv->id.service_id)
+		    (service_mask & cur_cm_id_priv->id.service_id) &&
+		    (cm_id_priv->id.device == cur_cm_id_priv->id.device))
+			return cur_cm_id_priv;
+
+		if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
+			link = &(*link)->rb_left;
+		else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
+			link = &(*link)->rb_right;
+		else if (service_id < cur_cm_id_priv->id.service_id)
 			link = &(*link)->rb_left;
 		else
 			link = &(*link)->rb_right;
@@ -378,7 +384,8 @@ static struct cm_id_private * cm_insert_
 	return NULL;
 }
 
-static struct cm_id_private * cm_find_listen(__be64 service_id)
+static struct cm_id_private * cm_find_listen(struct ib_device *device,
+					     __be64 service_id)
 {
 	struct rb_node *node = cm.listen_service_table.rb_node;
 	struct cm_id_private *cm_id_priv;
@@ -386,9 +393,15 @@ static struct cm_id_private * cm_find_li
 	while (node) {
 		cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
 		if ((cm_id_priv->id.service_mask & service_id) ==
-		    (cm_id_priv->id.service_mask & cm_id_priv->id.service_id))
+		     cm_id_priv->id.service_id &&
+		    (cm_id_priv->id.device == device))
 			return cm_id_priv;
-		if (service_id < cm_id_priv->id.service_id)
+
+		if (device < cm_id_priv->id.device)
+			node = node->rb_left;
+		else if (device > cm_id_priv->id.device)
+			node = node->rb_right;
+		else if (service_id < cm_id_priv->id.service_id)
 			node = node->rb_left;
 		else
 			node = node->rb_right;
@@ -523,7 +536,8 @@ static void cm_reject_sidr_req(struct cm
 	ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
 }
 
-struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+				 ib_cm_handler cm_handler,
 				 void *context)
 {
 	struct cm_id_private *cm_id_priv;
@@ -535,6 +549,7 @@ struct ib_cm_id *ib_create_cm_id(ib_cm_h
 
 	memset(cm_id_priv, 0, sizeof *cm_id_priv);
 	cm_id_priv->id.state = IB_CM_IDLE;
+	cm_id_priv->id.device = device;
 	cm_id_priv->id.cm_handler = cm_handler;
 	cm_id_priv->id.context = context;
 	cm_id_priv->id.remote_cm_qpn = 1;
@@ -1047,7 +1062,6 @@ static void cm_format_req_event(struct c
 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
 	param = &work->cm_event.param.req_rcvd;
 	param->listen_id = listen_id;
-	param->device = cm_id_priv->av.port->mad_agent->device;
 	param->port = cm_id_priv->av.port->port_num;
 	param->primary_path = &work->path[0];
 	if (req_msg->alt_local_lid)
@@ -1226,7 +1240,8 @@ static struct cm_id_private * cm_match_r
 	}
 
 	/* Find matching listen request. */
-	listen_cm_id_priv = cm_find_listen(req_msg->service_id);
+	listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
+					   req_msg->service_id);
 	if (!listen_cm_id_priv) {
 		spin_unlock_irqrestore(&cm.lock, flags);
 		cm_issue_rej(work->port, work->mad_recv_wc,
@@ -1254,7 +1269,7 @@ static int cm_req_handler(struct cm_work
 
 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
 
-	cm_id = ib_create_cm_id(NULL, NULL);
+	cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
 	if (IS_ERR(cm_id))
 		return PTR_ERR(cm_id);
 
@@ -2629,7 +2644,6 @@ static void cm_format_sidr_req_event(str
 	param = &work->cm_event.param.sidr_req_rcvd;
 	param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
 	param->listen_id = listen_id;
-	param->device = work->port->mad_agent->device;
 	param->port = work->port->port_num;
 	work->cm_event.private_data = &sidr_req_msg->private_data;
 }
@@ -2642,7 +2656,7 @@ static int cm_sidr_req_handler(struct cm
 	struct ib_wc *wc;
 	unsigned long flags;
 
-	cm_id = ib_create_cm_id(NULL, NULL);
+	cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
 	if (IS_ERR(cm_id))
 		return PTR_ERR(cm_id);
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -2666,7 +2680,8 @@ static int cm_sidr_req_handler(struct cm
 		spin_unlock_irqrestore(&cm.lock, flags);
 		goto out; /* Duplicate message. */
 	}
-	cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id);
+	cur_cm_id_priv = cm_find_listen(cm_id->device,
+					sidr_req_msg->service_id);
 	if (!cur_cm_id_priv) {
 		rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
 		spin_unlock_irqrestore(&cm.lock, flags);
Index: linux-kernel/infiniband/core/ucm.c
===================================================================
--- linux-kernel/infiniband/core/ucm.c	(revision 3433)
+++ linux-kernel/infiniband/core/ucm.c	(working copy)
@@ -52,12 +52,20 @@ MODULE_AUTHOR("Libor Michalek");
 MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
 MODULE_LICENSE("Dual BSD/GPL");
 
+struct ib_ucm_device {
+	int			devnum;
+	struct cdev		dev;
+	struct class_device	class_dev;
+	struct ib_device	*ib_dev;
+};
+
 struct ib_ucm_file {
 	struct semaphore mutex;
 	struct file *filp;
+	struct ib_ucm_device *device;
 
-	struct list_head  ctxs;   /* list of active connections */
-	struct list_head  events; /* list of pending events */
+	struct list_head  ctxs;
+	struct list_head  events;
 	wait_queue_head_t poll_wait;
 };
 
@@ -90,14 +98,24 @@ struct ib_ucm_event {
 
 enum {
 	IB_UCM_MAJOR = 231,
-	IB_UCM_MINOR = 255
+	IB_UCM_BASE_MINOR = 224,
+	IB_UCM_MAX_DEVICES = 32
 };
 
-#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR)
+#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
 
-static struct semaphore ctx_id_mutex;
-static struct idr       ctx_id_table;
+static void ib_ucm_add_one(struct ib_device *device);
+static void ib_ucm_remove_one(struct ib_device *device);
 
+static struct ib_client ucm_client = {
+	.name   = "ucm",
+	.add    = ib_ucm_add_one,
+	.remove = ib_ucm_remove_one
+};
+
+DECLARE_MUTEX(ctx_id_mutex);
+DEFINE_IDR(ctx_id_table);
+static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES);
 
 static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
 {
@@ -184,10 +202,7 @@ error:
 	kfree(ctx);
 	return NULL;
 }
-/*
- * Event portion of the API, handle CM events
- * and allow event polling.
- */
+
 static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath,
 				  struct ib_sa_path_rec	 *kpath)
 {
@@ -234,6 +249,7 @@ static void ib_ucm_event_req_get(struct 
 	ureq->retry_count                = kreq->retry_count;
 	ureq->rnr_retry_count            = kreq->rnr_retry_count;
 	ureq->srq                        = kreq->srq;
+	ureq->port			 = kreq->port;
 
 	ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path);
 	ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path);
@@ -320,6 +336,8 @@ static int ib_ucm_event_process(struct i
 	case IB_CM_SIDR_REQ_RECEIVED:
 		uvt->resp.u.sidr_req_resp.pkey = 
 					evt->param.sidr_req_rcvd.pkey;
+		uvt->resp.u.sidr_req_resp.port = 
+					evt->param.sidr_req_rcvd.port;
 		uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
 		break;
 	case IB_CM_SIDR_REP_RECEIVED:
@@ -412,9 +430,7 @@ static ssize_t ib_ucm_event(struct ib_uc
 
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
-	/*
-	 * wait
-	 */
+
 	down(&file->mutex);
 	while (list_empty(&file->events)) {
 
@@ -496,7 +512,6 @@ done:
 	return result;
 }
 
-
 static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
 				const char __user *inbuf,
 				int in_len, int out_len)
@@ -519,29 +534,27 @@ static ssize_t ib_ucm_create_id(struct i
 		return -ENOMEM;
 
 	ctx->uid = cmd.uid;
-	ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx);
+	ctx->cm_id = ib_create_cm_id(file->device->ib_dev,
+				     ib_ucm_event_handler, ctx);
 	if (IS_ERR(ctx->cm_id)) {
 		result = PTR_ERR(ctx->cm_id);
-		goto err;
+		goto err1;
 	}
 
 	resp.id = ctx->id;
 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
 			 &resp, sizeof(resp))) {
 		result = -EFAULT;
-		goto err;
+		goto err2;
 	}
-
 	return 0;
 
-err:
+err2:
+	ib_destroy_cm_id(ctx->cm_id);
+err1:
 	down(&ctx_id_mutex);
 	idr_remove(&ctx_id_table, ctx->id);
 	up(&ctx_id_mutex);
-
-	if (!IS_ERR(ctx->cm_id))
-		ib_destroy_cm_id(ctx->cm_id);
-
 	kfree(ctx);
 	return result;
 }
@@ -1253,6 +1266,7 @@ static int ib_ucm_open(struct inode *ino
 
 	filp->private_data = file;
 	file->filp = filp;
+	file->device = container_of(inode->i_cdev, struct ib_ucm_device, dev);
 
 	return 0;
 }
@@ -1283,7 +1297,17 @@ static int ib_ucm_close(struct inode *in
 	return 0;
 }
 
-static struct file_operations ib_ucm_fops = {
+static void ib_ucm_release_class_dev(struct class_device *class_dev)
+{
+	struct ib_ucm_device *dev;
+
+	dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+	cdev_del(&dev->dev);
+	clear_bit(dev->devnum, dev_map);
+	kfree(dev);
+}
+
+static struct file_operations ucm_fops = {
 	.owner 	 = THIS_MODULE,
 	.open 	 = ib_ucm_open,
 	.release = ib_ucm_close,
@@ -1291,55 +1315,141 @@ static struct file_operations ib_ucm_fop
 	.poll    = ib_ucm_poll,
 };
 
+static struct class ucm_class = {
+	.name    = "infiniband_cm",
+	.release = ib_ucm_release_class_dev
+};
 
-static struct class *ib_ucm_class;
-static struct cdev	  ib_ucm_cdev;
+static ssize_t show_dev(struct class_device *class_dev, char *buf)
+{
+	struct ib_ucm_device *dev;
+	
+	dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+	return print_dev_t(buf, dev->dev.dev);
+}
+static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
 
-static int __init ib_ucm_init(void)
+static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 {
-	int result;
+	struct ib_ucm_device *dev;
+	
+	dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+	return sprintf(buf, "%s\n", dev->ib_dev->name);
+}
+static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
 
-	result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm");
-	if (result) {
-		printk(KERN_ERR "ucm: Error <%d> registering dev\n", result);
-		goto err_chr;
-	}
+static void ib_ucm_add_one(struct ib_device *device)
+{
+	struct ib_ucm_device *ucm_dev;
 
-	cdev_init(&ib_ucm_cdev, &ib_ucm_fops);
+	if (!device->alloc_ucontext)
+		return;
 
-	result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1);
-	if (result) {
-		printk(KERN_ERR "ucm: Error <%d> adding cdev\n", result);
+	ucm_dev = kmalloc(sizeof *ucm_dev, GFP_KERNEL);
+	if (!ucm_dev)
+		return;
+
+	memset(ucm_dev, 0, sizeof *ucm_dev);
+	ucm_dev->ib_dev = device;
+
+	ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
+	if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES)
+		goto err;
+
+	set_bit(ucm_dev->devnum, dev_map);
+
+	cdev_init(&ucm_dev->dev, &ucm_fops);
+	ucm_dev->dev.owner = THIS_MODULE;
+	kobject_set_name(&ucm_dev->dev.kobj, "ucm%d", ucm_dev->devnum);
+	if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
+		goto err;
+
+	ucm_dev->class_dev.class = &ucm_class;
+	ucm_dev->class_dev.dev = device->dma_device;
+	snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d",
+		 ucm_dev->devnum);
+	if (class_device_register(&ucm_dev->class_dev))
 		goto err_cdev;
-	}
 
-	ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm");
-	if (IS_ERR(ib_ucm_class)) {
-		result = PTR_ERR(ib_ucm_class);
-		printk(KERN_ERR "Error <%d> creating class\n", result);
+	if (class_device_create_file(&ucm_dev->class_dev,
+				     &class_device_attr_dev))
 		goto err_class;
+	if (class_device_create_file(&ucm_dev->class_dev,
+				     &class_device_attr_ibdev))
+		goto err_class;
+
+	ib_set_client_data(device, &ucm_client, ucm_dev);
+	return;
+
+err_class:
+	class_device_unregister(&ucm_dev->class_dev);
+err_cdev:
+	cdev_del(&ucm_dev->dev);
+	clear_bit(ucm_dev->devnum, dev_map);
+err:
+	kfree(ucm_dev);
+	return;
+}
+
+static void ib_ucm_remove_one(struct ib_device *device)
+{
+	struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client);
+
+	if (!ucm_dev)
+		return;
+
+	class_device_unregister(&ucm_dev->class_dev);
+}
+
+static ssize_t show_abi_version(struct class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
+}
+static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static int __init ib_ucm_init(void)
+{
+	int ret;
+
+	ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
+				     "infiniband_cm");
+	if (ret) {
+		printk(KERN_ERR "ucm: couldn't register device number\n");
+		goto err;
 	}
 
-	class_device_create(ib_ucm_class, IB_UCM_DEV, NULL, "ucm");
+	ret = class_register(&ucm_class);
+	if (ret) {
+		printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n");
+		goto err_chrdev;
+	}
 
-	idr_init(&ctx_id_table);
-	init_MUTEX(&ctx_id_mutex);
+	ret = class_create_file(&ucm_class, &class_attr_abi_version);
+	if (ret) {
+		printk(KERN_ERR "ucm: couldn't create abi_version attribute\n");
+		goto err_class;
+	}
 
+	ret = ib_register_client(&ucm_client);
+	if (ret) {
+		printk(KERN_ERR "ucm: couldn't register client\n");
+		goto err_class;
+	}
 	return 0;
+
 err_class:
-	cdev_del(&ib_ucm_cdev);
-err_cdev:
-	unregister_chrdev_region(IB_UCM_DEV, 1);
-err_chr:
-	return result;
+	class_unregister(&ucm_class);
+err_chrdev:
+	unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+err:
+	return ret;
 }
 
 static void __exit ib_ucm_cleanup(void)
 {
-	class_device_destroy(ib_ucm_class, IB_UCM_DEV);
-	class_destroy(ib_ucm_class);
-	cdev_del(&ib_ucm_cdev);
-	unregister_chrdev_region(IB_UCM_DEV, 1);
+	ib_unregister_client(&ucm_client);
+	class_unregister(&ucm_class);
+	unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
 }
 
 module_init(ib_ucm_init);






More information about the general mailing list