[openib-general] [PATCH] Initial checkin of userspace MAD access

Roland Dreier roland at topspin.com
Tue Nov 2 22:27:48 PST 2004


I've just checked in an initial version of userspace MAD access
(including documentation in docs/user_mad.txt).

Unfortunately this is not quite ready for use underneath OpenSM, since
it is not possible to register an agent for the SM classes (since they
are currently grabbed by the kernel SMA first).

All criticisms and comments greatly appreciated...

Thanks,
  Roland

Index: infiniband/include/ib_user_mad.h
===================================================================
--- infiniband/include/ib_user_mad.h	(revision 0)
+++ infiniband/include/ib_user_mad.h	(revision 0)
@@ -0,0 +1,97 @@
+/*
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available at
+ * <http://www.fsf.org/copyleft/gpl.html>, or the OpenIB.org BSD
+ * license, available in the LICENSE.TXT file accompanying this
+ * software.  These details are also available at
+ * <http://openib.org/license.html>.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ *
+ * $Id$
+ */
+
+#ifndef IB_USER_MAD_H
+#define IB_USER_MAD_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ */
+
+/**
+ * ib_user_mad - MAD packet
+ * @data - Contents of MAD
+ * @id - ID of agent MAD received with/to be sent with
+ * @qpn - Remote QP number received from/to be sent to
+ * @qkey - Remote Q_Key to be sent with (unset on receive)
+ * @lid - Remote lid received from/to be sent to
+ * @sl - Service level received with/to be sent with
+ * @path_bits - Local path bits received with/to be sent with
+ * @grh_present - If set, GRH was received/should be sent
+ * @gid_index - Local GID index to send with (unset on receive)
+ * @hop_limit - Hop limit in GRH
+ * @traffic_class - Traffic class in GRH
+ * @gid - Remote GID in GRH
+ * @flow_label - Flow label in GRH
+ *
+ * All multi-byte quantities are stored in network (big endian) byte order.
+ */
+struct ib_user_mad {
+	__u8	data[256];
+	__u32	id;
+	__u32	qpn;
+	__u32   qkey;
+	__u16	lid;
+	__u8	sl;
+	__u8	path_bits;
+	__u8	grh_present;
+	__u8	gid_index;
+	__u8	hop_limit;
+	__u8	traffic_class;
+	__u8	gid[16];
+	__u32	flow_label;
+};
+
+/**
+ * ib_user_mad_reg_req - MAD registration request
+ * @id - Set by the kernel; used to identify agent in future requests.
+ * @qpn - Queue pair number; must be 0 or 1.
+ * @method_mask - The caller will receive unsolicited MADs for any method
+ *   where @method_mask = 1.
+ * @mgmt_class - Indicates which management class of MADs should be receive
+ *   by the caller.  This field is only required if the user wishes to
+ *   receive unsolicited MADs, otherwise it should be 0.
+ * @mgmt_class_version - Indicates which version of MADs for the given
+ *   management class to receive.
+ */
+struct ib_user_mad_reg_req {
+	__u32	id;
+	__u32	method_mask[4];
+	__u8	qpn;
+	__u8	mgmt_class;
+	__u8	mgmt_class_version;
+};
+
+#define IB_IOCTL_MAGIC		0x1b
+
+#define IB_USER_MAD_REGISTER_AGENT	_IOWR(IB_IOCTL_MAGIC, 0, \
+					      struct ib_user_mad_reg_req)
+
+#define IB_USER_MAD_UNREGISTER_AGENT	_IOW(IB_IOCTL_MAGIC, 1, __u32)
+
+#endif /* IB_USER_MAD_H */
Index: infiniband/core/Makefile
===================================================================
--- infiniband/core/Makefile	(revision 1086)
+++ infiniband/core/Makefile	(working copy)
@@ -10,7 +10,8 @@
 obj-$(CONFIG_INFINIBAND) += \
     ib_core.o \
     ib_mad.o \
-    ib_sa.o
+    ib_sa.o \
+    ib_umad.o
 
 obj-$(CONFIG_INFINIBAND_CM) += \
     ib_cm.o
@@ -36,6 +37,8 @@
 
 ib_sa-objs := sa_query.o
 
+ib_umad-objs := user_mad.o
+
 ib_cm-objs := \
     cm_main.o \
     cm_api.o \
Index: infiniband/core/user_mad.c
===================================================================
--- infiniband/core/user_mad.c	(revision 0)
+++ infiniband/core/user_mad.c	(revision 0)
@@ -0,0 +1,639 @@
+/*
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available at
+ * <http://www.fsf.org/copyleft/gpl.html>, or the OpenIB.org BSD
+ * license, available in the LICENSE.TXT file accompanying this
+ * software.  These details are also available at
+ * <http://openib.org/license.html>.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ *
+ * $Id$
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/pci.h>
+#include <linux/poll.h>
+
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+
+#include <ib_mad.h>
+#include <ib_user_mad.h>
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
+MODULE_LICENSE("Dual BSD/GPL");
+
+enum {
+	IB_UMAD_MAX_PORTS  = 256,
+	IB_UMAD_MAX_AGENTS = 32
+};
+
+struct ib_umad_port {
+	int                  devnum;
+	struct cdev          dev;
+	struct class_device *class_dev;
+	struct ib_device    *ib_dev;
+	u8                   port_num;
+};
+
+struct ib_umad_device {
+	int                  start_port, end_port;
+	struct ib_umad_port  port[0];
+};
+
+struct ib_umad_file {
+	struct ib_umad_port *port;
+	struct semaphore     mutex;
+	struct list_head     recv_list;
+	wait_queue_head_t    recv_wait;
+	struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
+	struct ib_mr        *mr[IB_UMAD_MAX_AGENTS];
+};
+
+struct ib_umad_packet {
+	struct ib_user_mad mad;
+	struct ib_ah      *ah;
+	struct list_head   list;
+	DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+static dev_t base_dev;
+static spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
+static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
+
+static struct class_simple *umad_class;
+
+static void ib_umad_add_one(struct ib_device *device);
+static void ib_umad_remove_one(struct ib_device *device);
+
+static void send_handler(struct ib_mad_agent *agent,
+			 struct ib_mad_send_wc *mad_send_wc)
+{
+	struct ib_umad_packet *packet =
+		(void *) (unsigned long) mad_send_wc->wr_id;
+
+	pci_unmap_single(agent->device->dma_device,
+			 pci_unmap_addr(packet, mapping),
+			 sizeof packet->mad.data,
+			 PCI_DMA_TODEVICE);
+	ib_destroy_ah(packet->ah);
+	kfree(packet);
+}
+
+static void recv_handler(struct ib_mad_agent *agent,
+			 struct ib_mad_recv_wc *mad_recv_wc)
+{
+	struct ib_umad_file *file = agent->context;
+	struct ib_umad_packet *packet;
+
+	if (mad_recv_wc->wc->status != IB_WC_SUCCESS)
+		goto out;
+
+	packet = kmalloc(sizeof *packet, GFP_KERNEL);
+	if (!packet)
+		goto out;
+
+	memset(packet, 0, sizeof *packet);
+
+	memcpy(packet->mad.data, mad_recv_wc->recv_buf->mad, sizeof packet->mad.data);
+	packet->mad.qpn 	  = cpu_to_be32(mad_recv_wc->wc->src_qp);
+	packet->mad.lid 	  = cpu_to_be16(mad_recv_wc->wc->slid);
+	packet->mad.sl  	  = mad_recv_wc->wc->sl;
+	packet->mad.path_bits 	  = mad_recv_wc->wc->dlid_path_bits;
+	packet->mad.grh_present   = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
+	if (packet->mad.grh_present) {
+		/* XXX parse GRH */
+		packet->mad.gid_index 	  = 0;
+		packet->mad.hop_limit 	  = 0;
+		packet->mad.traffic_class = 0;
+		memset(packet->mad.gid, 0, 16);
+		packet->mad.flow_label 	  = 0;
+	}
+
+	down(&file->mutex);
+	for (packet->mad.id = 0;
+	     packet->mad.id < IB_UMAD_MAX_AGENTS;
+	     packet->mad.id++)
+		if (agent == file->agent[packet->mad.id]) {
+			list_add_tail(&packet->list, &file->recv_list);
+			wake_up_interruptible(&file->recv_wait);
+			goto agent;
+		}
+
+	kfree(packet);
+
+agent:
+	up(&file->mutex);
+
+out:
+	ib_free_recv_mad(mad_recv_wc);
+}
+
+static ssize_t ib_umad_read(struct file *filp, char __user *buf,
+			    size_t count, loff_t *pos)
+{
+	struct ib_umad_file *file = filp->private_data;
+	struct ib_umad_packet *packet;
+	ssize_t ret;
+
+	if (count < sizeof (struct ib_user_mad))
+		return -EINVAL;
+
+	if (down_interruptible(&file->mutex))
+		return -ERESTARTSYS;
+
+	while (list_empty(&file->recv_list)) {
+		up(&file->mutex);
+
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		if (wait_event_interruptible(file->recv_wait,
+					     !list_empty(&file->recv_list)))
+			return -ERESTARTSYS;
+
+		if (down_interruptible(&file->mutex))
+			return -ERESTARTSYS;
+	}
+
+	packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
+	list_del(&packet->list);
+
+	up(&file->mutex);
+
+	if (copy_to_user(buf, &packet->mad, sizeof packet->mad))
+		ret = -EFAULT;
+	else
+		ret = sizeof packet->mad;
+
+	kfree(packet);
+	return ret;
+}
+
+static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
+			     size_t count, loff_t *pos)
+{
+	struct ib_umad_file *file = filp->private_data;
+	struct ib_umad_packet *packet;
+	struct ib_mad_agent *agent;
+	struct ib_ah_attr ah_attr;
+	struct ib_sge      gather_list;
+	struct ib_send_wr *bad_wr, wr = {
+		.opcode      = IB_WR_SEND,
+		.sg_list     = &gather_list,
+		.num_sge     = 1,
+		.send_flags  = IB_SEND_SIGNALED,
+	};
+	int ret;
+
+	if (count < sizeof (struct ib_user_mad))
+		return -EINVAL;
+
+	packet = kmalloc(sizeof *packet, GFP_KERNEL);
+	if (!packet)
+		return -ENOMEM;
+
+	if (copy_from_user(&packet->mad, buf, sizeof packet->mad)) {
+		kfree(packet);
+		return -EFAULT;
+	}
+
+	if (packet->mad.id < 0 || packet->mad.id >= IB_UMAD_MAX_AGENTS) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (down_interruptible(&file->mutex)) {
+		ret = -ERESTARTSYS;
+		goto err;
+	}
+
+	agent = file->agent[packet->mad.id];
+	if (!agent) {
+		ret = -EINVAL;
+		goto err_up;
+	}
+
+	((struct ib_mad_hdr *) packet->mad.data)->tid =
+		cpu_to_be64(((u64) agent->hi_tid) << 32 |
+			    (be64_to_cpu(((struct ib_mad_hdr *) packet->mad.data)->tid) &
+			     0xffffffff));
+
+	memset(&ah_attr, 0, sizeof ah_attr);
+	ah_attr.dlid          = be16_to_cpu(packet->mad.lid);
+	ah_attr.sl            = packet->mad.sl;
+	ah_attr.src_path_bits = packet->mad.path_bits;
+	ah_attr.port_num      = file->port->port_num;
+	/* XXX handle GRH */
+
+	packet->ah = ib_create_ah(agent->qp->pd, &ah_attr);
+	if (IS_ERR(packet->ah)) {
+		ret = PTR_ERR(packet->ah);
+		goto err_up;
+	}
+
+	gather_list.addr = pci_map_single(agent->device->dma_device,
+					  packet->mad.data,
+					  sizeof packet->mad.data,
+					  PCI_DMA_TODEVICE);
+	gather_list.length = sizeof packet->mad.data;
+	gather_list.lkey   = file->mr[packet->mad.id]->lkey;
+	pci_unmap_addr_set(packet, mapping, gather_list.addr);
+
+	wr.wr.ud.mad_hdr     = (struct ib_mad_hdr *) packet->mad.data;
+	wr.wr.ud.ah          = packet->ah;
+	wr.wr.ud.remote_qpn  = be32_to_cpu(packet->mad.qpn);
+	wr.wr.ud.remote_qkey = be32_to_cpu(packet->mad.qkey);
+
+	wr.wr_id            = (unsigned long) packet;
+
+	ret = ib_post_send_mad(agent, &wr, &bad_wr);
+	if (ret) {
+		pci_unmap_single(agent->device->dma_device,
+				 pci_unmap_addr(packet, mapping),
+				 sizeof packet->mad.data,
+				 PCI_DMA_TODEVICE);
+		goto err_up;
+	}
+
+	up(&file->mutex);
+
+	return sizeof packet->mad;
+
+err_up:
+	up(&file->mutex);
+
+err:
+	kfree(packet);
+	return ret;
+}
+
+static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wait)
+{
+	struct ib_umad_file *file = filp->private_data;
+
+	/* we will always be able to post a MAD send */
+	unsigned int mask = POLLOUT | POLLWRNORM;
+
+	poll_wait(filp, &file->recv_wait, wait);
+
+	if (!list_empty(&file->recv_list))
+		mask |= POLLIN | POLLRDNORM;
+
+	return mask;
+}
+
+static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
+{
+	struct ib_user_mad_reg_req ureq;
+	struct ib_mad_reg_req req;
+	struct ib_mad_agent *agent;
+	int agent_id;
+	int ret;
+
+	if (down_interruptible(&file->mutex))
+		return -EINTR;
+
+	if (copy_from_user(&ureq, (void __user *) arg, sizeof ureq)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	if (ureq.qpn != 0 && ureq.qpn != 1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
+		if (!file->agent[agent_id])
+			goto found;
+
+	ret = -ENOMEM;
+	goto out;
+
+found:
+	req.mgmt_class         = ureq.mgmt_class;
+	req.mgmt_class_version = ureq.mgmt_class_version;
+	memcpy(req.method_mask, ureq.method_mask, sizeof req.method_mask);
+
+	agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
+				      ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
+				      &req, 0, send_handler, recv_handler,
+				      file);
+	if (IS_ERR(agent)) {
+		ret = PTR_ERR(agent);
+		goto out;
+	}
+
+	file->agent[agent_id] = agent;
+
+	file->mr[agent_id] = ib_get_dma_mr(agent->qp->pd, IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(file->mr[agent_id])) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	if (put_user(agent_id,
+		     (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) {
+		ret = -EFAULT;
+		goto err_mr;
+	}
+
+	ret = 0;
+	goto out;
+
+err_mr:
+	ib_dereg_mr(file->mr[agent_id]);
+
+err:
+	file->agent[agent_id] = NULL;
+	ib_unregister_mad_agent(agent);
+
+out:
+	up(&file->mutex);
+	return ret;
+}
+
+static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg)
+{
+	u32 id;
+	int ret = 0;
+
+	if (down_interruptible(&file->mutex))
+		return -EINTR;
+
+	if (get_user(id, (u32 __user *) arg)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ib_dereg_mr(file->mr[id]);
+	ib_unregister_mad_agent(file->agent[id]);
+	file->agent[id] = NULL;
+
+out:
+	up(&file->mutex);
+	return ret;
+}
+
+static int ib_umad_ioctl(struct inode *inode, struct file *filp,
+			 unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case IB_USER_MAD_REGISTER_AGENT:
+		return ib_umad_reg_agent(filp->private_data, arg);
+	case IB_USER_MAD_UNREGISTER_AGENT:
+		return ib_umad_unreg_agent(filp->private_data, arg);
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+
+static int ib_umad_open(struct inode *inode, struct file *filp)
+{
+	struct ib_umad_port *port =
+		container_of(inode->i_cdev, struct ib_umad_port, dev);
+	struct ib_umad_file *file;
+
+	file = kmalloc(sizeof *file, GFP_KERNEL);
+	if (!file)
+		return -ENOMEM;
+
+	memset(file, 0, sizeof *file);
+
+	init_MUTEX(&file->mutex);
+	INIT_LIST_HEAD(&file->recv_list);
+	init_waitqueue_head(&file->recv_wait);
+
+	file->port = port;
+	filp->private_data = file;
+
+	return 0;
+}
+
+static int ib_umad_close(struct inode *inode, struct file *filp)
+{
+	struct ib_umad_file *file = filp->private_data;
+	int i;
+
+	for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
+		if (file->agent[i]) {
+			ib_dereg_mr(file->mr[i]);
+			ib_unregister_mad_agent(file->agent[i]);
+		}
+
+	kfree(file);
+
+	return 0;
+}
+
+static struct file_operations umad_fops = {
+	.owner 	 = THIS_MODULE,
+	.read 	 = ib_umad_read,
+	.write 	 = ib_umad_write,
+	.poll 	 = ib_umad_poll,
+	.ioctl 	 = ib_umad_ioctl,
+	.open 	 = ib_umad_open,
+	.release = ib_umad_close
+};
+
+static struct ib_client umad_client = {
+	.name   = "umad",
+	.add    = ib_umad_add_one,
+	.remove = ib_umad_remove_one
+};
+
+static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
+{
+	struct ib_umad_port *port = class_get_devdata(class_dev);
+
+	return sprintf(buf, "%s\n", port->ib_dev->name);
+}
+CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+
+static ssize_t show_port(struct class_device *class_dev, char *buf)
+{
+	struct ib_umad_port *port = class_get_devdata(class_dev);
+
+	return sprintf(buf, "%d\n", port->port_num);
+}
+CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
+
+static void ib_umad_add_one(struct ib_device *device)
+{
+	struct ib_umad_device *umad_dev;
+	int s, e, i;
+
+	if (device->node_type == IB_NODE_SWITCH)
+		s = e = 0;
+	else {
+		struct ib_device_attr attr;
+		if (ib_query_device(device, &attr))
+			return;
+
+		s = 1;
+		e = attr.phys_port_cnt;
+	}
+
+	umad_dev = kmalloc(sizeof *umad_dev +
+			   (e - s + 1) * sizeof (struct ib_umad_port),
+			   GFP_KERNEL);
+	if (!umad_dev)
+		return;
+
+	umad_dev->start_port = s;
+	umad_dev->end_port   = e;
+
+	for (i = s; i <= e; ++i) {
+		spin_lock(&map_lock);
+		umad_dev->port[i - s].devnum =
+			find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
+		if (umad_dev->port[i - s].devnum >= IB_UMAD_MAX_PORTS) {
+			spin_unlock(&map_lock);
+			goto err;
+		}
+		set_bit(umad_dev->port[i - s].devnum, dev_map);
+		spin_unlock(&map_lock);
+
+		umad_dev->port[i - s].ib_dev   = device;
+		umad_dev->port[i - s].port_num = i;
+
+		cdev_init(&umad_dev->port[i - s].dev, &umad_fops);
+		umad_dev->port[i - s].dev.owner = THIS_MODULE;
+		kobject_set_name(&umad_dev->port[i - s].dev.kobj,
+				 "umad%d", umad_dev->port[i - s].devnum);
+		if (cdev_add(&umad_dev->port[i - s].dev, base_dev +
+			     umad_dev->port[i - s].devnum, 1))
+			goto err;
+
+		umad_dev->port[i - s].class_dev =
+			class_simple_device_add(umad_class,
+						umad_dev->port[i - s].dev.dev,
+						&device->dma_device->dev,
+						"umad%d", umad_dev->port[i - s].devnum);
+		if (IS_ERR(umad_dev->port[i - s].class_dev))
+			goto err_class;
+
+		class_set_devdata(umad_dev->port[i - s].class_dev,
+				  &umad_dev->port[i - s]);
+
+		class_device_create_file(umad_dev->port[i - s].class_dev,
+					 &class_device_attr_ibdev);
+		class_device_create_file(umad_dev->port[i - s].class_dev,
+					 &class_device_attr_port);
+	}
+
+	ib_set_client_data(device, &umad_client, umad_dev);
+
+	return;
+
+err_class:
+	cdev_del(&umad_dev->port[i - s].dev);
+	clear_bit(umad_dev->port[i - s].devnum, dev_map);
+
+err:
+	while (--i >= s) {
+		class_simple_device_remove(umad_dev->port[i - s].dev.dev);
+		cdev_del(&umad_dev->port[i - s].dev);
+		clear_bit(umad_dev->port[i - s].devnum, dev_map);
+	}
+
+	kfree(umad_dev);
+}
+
+static void ib_umad_remove_one(struct ib_device *device)
+{
+	struct ib_umad_device *umad_dev = ib_get_client_data(device, &umad_client);
+	int i;
+
+	if (!umad_dev)
+		return;
+
+	for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) {
+		class_simple_device_remove(umad_dev->port[i].dev.dev);
+		cdev_del(&umad_dev->port[i].dev);
+		clear_bit(umad_dev->port[i].devnum, dev_map);
+	}
+
+	kfree(umad_dev);
+}
+
+static int ib_umad_hotplug(struct class_device *dev, char **envp,
+			   int num_envp, char *buffer, int buffer_size)
+{
+	return 0;
+}
+
+static int __init ib_umad_init(void)
+{
+	int ret;
+
+	ret = alloc_chrdev_region(&base_dev, 0, IB_UMAD_MAX_PORTS,
+				  "infiniband_mad");
+	if (ret) {
+		printk(KERN_ERR "user_mad: couldn't get device number\n");
+		goto out;
+	}
+
+	umad_class = class_simple_create(THIS_MODULE, "infiniband_mad");
+	if (IS_ERR(umad_class)) {
+		printk(KERN_ERR "user_mad: couldn't create class_simple\n");
+		ret = PTR_ERR(umad_class);
+		goto out_chrdev;
+	}
+
+	ret = class_simple_set_hotplug(umad_class, ib_umad_hotplug);
+	if (ret) {
+		printk(KERN_ERR "user_mad: couldn't set class_simple hotplug\n");
+		goto out_class;
+	}
+
+	ret = ib_register_client(&umad_client);
+	if (ret) {
+		printk(KERN_ERR "user_mad: couldn't register ib_umad client\n");
+		goto out_class;
+	}
+		
+	return 0;
+
+out_class:
+	class_simple_destroy(umad_class);
+
+out_chrdev:
+	unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS);
+
+out:
+	return ret;
+}
+
+static void __exit ib_umad_cleanup(void)
+{
+	ib_unregister_client(&umad_client);
+	class_simple_destroy(umad_class);
+	unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS);
+}
+
+module_init(ib_umad_init);
+module_exit(ib_umad_cleanup);
Index: docs/user_mad.txt
===================================================================
--- docs/user_mad.txt	(revision 0)
+++ docs/user_mad.txt	(revision 0)
@@ -0,0 +1,70 @@
+USERSPACE MAD ACCESS
+
+Device files
+
+  Each port of each InfiniBand device has a "umad" device attached.
+  For example, a two-port HCA will have two devices, while a switch
+  will have one device (for switch port 0).
+
+Creating MAD agents
+
+  A MAD agent can be created by filling in a struct ib_user_mad_reg_req
+  and then calling the IB_USER_MAD_REGISTER_AGENT ioctl on a file
+  descriptor for the appropriate device file.  If the registration
+  request succeeds, a 32-bit id will be returned in the structure.
+  For example:
+
+	struct ib_user_mad_reg_req req = { /* ... */ };
+	ret = ioctl(fd, IB_USER_MAD_REGISTER_AGENT, (char *) &req);
+        if (!ret)
+		my_agent = req.id;
+	else
+		perror("agent register");
+
+  Agents can be unregistered with the IB_USER_MAD_UNREGISTER_AGENT
+  ioctl.  Also, all agents registered through a file descriptor will
+  be unregistered when the descriptor is closed.
+
+Receiving MADs
+
+  MADs are received using read().  The buffer passed to read() must be
+  large enough to hold at least one struct ib_user_mad.  For example:
+
+	struct ib_user_mad mad;
+	ret = read(fd, &mad, sizeof mad);
+	if (ret != sizeof mad)
+		perror("read");
+
+  In addition to the actual MAD contents, the other struct ib_user_mad
+  fields will be filled in with information on the received MAD.  For
+  example, the remote LID will be in mad.lid.
+
+  poll()/select() may be used to wait until a MAD can be read.
+
+Sending MADs
+
+  MADs are sent using write().  The agent ID for sending should be
+  filled into the id field of the MAD, the destination LID should be
+  filled into the lid field, and so on.  For example:
+
+	struct ib_user_mad mad;
+
+	/* fill in mad.data */
+
+	mad.id  = my_agent;	/* req.id from agent registration */
+	mad.lid = my_dest;	/* in network byte order... */
+	/* etc. */
+
+	ret = write(fd, &mad, sizeof mad);
+	if (ret != sizeof mad)
+		perror("write");
+
+/dev files
+
+  To create the appropriate character device files automatically with
+  udev, a rule like
+
+    KERNEL="umad*", NAME="infiniband/%s{ibdev}/umad%s{port}"
+
+  can be used.  This will create nodes such as /dev/infiniband/mthca0/umad1
+  for port 1 of device mthca0.



More information about the general mailing list