[openib-general] [PATCH v2] ib_usa: support userspace SA queries and multicast

Sean Hefty sean.hefty at intel.com
Thu Aug 24 17:10:00 PDT 2006


Changes from v1:

The ib_usa module exports two files: ib_usa_default and ib_usa_raw.

Use of the ib_usa_default restricts the user to sending PathRecord,
MultiPathRecord, MCMemberRecord, and ServiceRecord queries, and joining /
leaving multicast groups.

Use of ib_usa_raw allows any MADs to be sent to the SA.

An administrator can set control on these files in any appropriate way.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
Index: include/rdma/ib_usa.h
===================================================================
--- include/rdma/ib_usa.h	(revision 0)
+++ include/rdma/ib_usa.h	(revision 0)
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_USA_H
+#define IB_USA_H
+
+#include <linux/types.h>
+#include <rdma/ib_sa.h>
+
+#define IB_USA_ABI_VERSION	1
+
+#define IB_USA_EVENT_DATA	256
+
+enum {
+	IB_USA_CMD_SEND_MAD,
+	IB_USA_CMD_GET_EVENT,
+	IB_USA_CMD_GET_DATA,
+	IB_USA_CMD_JOIN_MCAST,
+	IB_USA_CMD_FREE_ID,
+	IB_USA_CMD_GET_MCAST
+};
+
+enum {
+	IB_USA_EVENT_MAD,
+	IB_USA_EVENT_MCAST
+};
+
+struct ib_usa_cmd_hdr {
+	__u32 cmd;
+	__u16 in;
+	__u16 out;
+};
+
+struct ib_usa_send_mad {
+	__u64 response;		/* unused - reserved */
+	__u64 uid;
+	__u64 node_guid;
+	__u64 comp_mask;
+	__u64 attr;
+	__u8  port_num;
+	__u8  method;
+	__be16 attr_id;
+	__u32 timeout_ms;
+	__u32 retries;
+};
+
+struct ib_usa_join_mcast {
+	__u64 response;
+	__u64 uid;
+	__u64 node_guid;
+	__u64 comp_mask;
+	__u64 mcmember_rec;
+	__u8  port_num;
+};
+
+struct ib_usa_id_resp {
+	__u32 id;
+};
+
+struct ib_usa_free_resp {
+	__u32 events_reported;
+};
+
+struct ib_usa_free_id {
+	__u64 response;
+	__u32 id;
+};
+
+struct ib_usa_get_event {
+	__u64 response;
+};
+
+struct ib_usa_event_resp {
+	__u64 uid;
+	__u32 id;
+	__u32 event;
+	__u32 status;
+	__u32 data_len;
+	__u8  data[IB_USA_EVENT_DATA];
+};
+
+struct ib_usa_get_data {
+	__u64 response;
+	__u32 id;
+};
+
+struct ib_usa_get_mcast {
+	__u64 response;
+	__u64 node_guid;
+	__u8  mgid[16];
+	__u8  port_num;
+};
+
+#endif /* IB_USA_H */
Index: core/usa.c
===================================================================
--- core/usa.c	(revision 0)
+++ core/usa.c	(revision 0)
@@ -0,0 +1,846 @@
+/*
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *	copyright notice, this list of conditions and the following
+ *	disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *	copyright notice, this list of conditions and the following
+ *	disclaimer in the documentation and/or other materials
+ *	provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/idr.h>
+#include <linux/miscdevice.h>
+
+#include <rdma/ib_usa.h>
+#include <rdma/ib_multicast.h>
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("IB userspace SA query");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static void usa_add_one(struct ib_device *device);
+static void usa_remove_one(struct ib_device *device);
+
+static struct ib_client usa_client = {
+	.name   = "ib_usa",
+	.add    = usa_add_one,
+	.remove = usa_remove_one
+};
+
+struct usa_device {
+	struct list_head list;
+	struct ib_device *device;
+	struct completion comp;
+	atomic_t refcount;
+	int start_port;
+	int end_port;
+};
+
+struct usa_file {
+	struct mutex		file_mutex;
+	struct file		*filp;
+	struct ib_sa_client	sa_client;
+	struct list_head	event_list;
+	struct list_head	data_list;
+	struct list_head	mcast_list;
+	wait_queue_head_t	poll_wait;
+	int			event_id;
+};
+
+struct usa_event {
+	struct usa_file *file;
+	struct list_head list;
+	struct ib_usa_event_resp resp;
+	struct ib_mad_recv_wc *mad_recv_wc;
+};
+
+struct usa_multicast {
+	struct usa_event event;
+	struct list_head list;
+	struct ib_multicast *multicast;
+	int events_reported;
+};
+
+static DEFINE_MUTEX(usa_mutex);
+static LIST_HEAD(dev_list);
+static DEFINE_IDR(usa_idr);
+
+static struct usa_device *acquire_dev(__be64 guid, __u8 port_num)
+{
+	struct usa_device *dev;
+
+	mutex_lock(&usa_mutex);
+	list_for_each_entry(dev, &dev_list, list) {
+		if (dev->device->node_guid == guid) {
+    			if (port_num < dev->start_port ||
+			    port_num > dev->end_port)
+				break;
+			atomic_inc(&dev->refcount);
+			mutex_unlock(&usa_mutex);
+			return dev;
+		}
+	}
+	mutex_unlock(&usa_mutex);
+	return NULL;
+}
+
+static void deref_dev(struct usa_device *dev)
+{
+	if (atomic_dec_and_test(&dev->refcount))
+		complete(&dev->comp);
+}
+
+static int insert_obj(void *obj, int *id)
+{
+	int ret;
+
+	do {
+		ret = idr_pre_get(&usa_idr, GFP_KERNEL);
+		if (!ret)
+			break;
+
+		mutex_lock(&usa_mutex);
+		ret = idr_get_new(&usa_idr, obj, id);
+		mutex_unlock(&usa_mutex);
+	} while (ret == -EAGAIN);
+
+	return ret;
+}
+
+static void remove_obj(int id)
+{
+	mutex_lock(&usa_mutex);
+	idr_remove(&usa_idr, id);
+	mutex_unlock(&usa_mutex);
+}
+
+static void finish_event(struct usa_event *event)
+{
+	struct usa_multicast *mcast;
+
+	switch (event->resp.event) {
+	case IB_USA_EVENT_MAD:
+		list_del(&event->list);
+		if (event->resp.data_len > IB_USA_EVENT_DATA)
+			list_add_tail(&event->list, &event->file->data_list);
+		else
+			kfree(event);
+		break;
+	case IB_USA_EVENT_MCAST:
+		list_del_init(&event->list);
+		mcast = container_of(event, struct usa_multicast, event);
+		mcast->events_reported++;
+		break;
+	default:
+		break;
+	}
+}
+
+static ssize_t usa_get_event(struct usa_file *file, const char __user *inbuf,
+			      int in_len, int out_len)
+{
+	struct ib_usa_get_event cmd;
+	struct usa_event *event;
+	int ret = 0;
+	DEFINE_WAIT(wait);
+
+	if (out_len < sizeof(struct ib_usa_event_resp))
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	mutex_lock(&file->file_mutex);
+	while (list_empty(&file->event_list)) {
+		if (file->filp->f_flags & O_NONBLOCK) {
+			ret = -EAGAIN;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
+		mutex_unlock(&file->file_mutex);
+		schedule();
+		mutex_lock(&file->file_mutex);
+		finish_wait(&file->poll_wait, &wait);
+	}
+
+	if (ret)
+		goto done;
+
+	event = list_entry(file->event_list.next, struct usa_event, list);
+
+	if (copy_to_user((void __user *)(unsigned long)cmd.response,
+			 &event->resp, sizeof(event->resp))) {
+		ret = -EFAULT;
+		goto done;
+	}
+
+	finish_event(event);
+done:
+	mutex_unlock(&file->file_mutex);
+	return ret;
+}
+
+static struct usa_event *get_event_data(struct usa_file *file, __u32 id)
+{
+	struct usa_event *event;
+
+	mutex_lock(&file->file_mutex);
+	list_for_each_entry(event, &file->data_list, list) {
+		if (event->resp.id == id) {
+			list_del(&event->list);
+			mutex_unlock(&file->file_mutex);
+			return event;
+		}
+	}
+	mutex_unlock(&file->file_mutex);
+	return NULL;
+}
+
+static int copy_event_data(struct usa_event *event, __u64 response)
+{
+	struct ib_sa_mad *mad;
+	struct ib_sa_iter *iter;
+	int attr_offset, ret = 0;
+	void *attr;
+
+	mad = (struct ib_sa_mad *) event->mad_recv_wc->recv_buf.mad;
+	attr_offset = be16_to_cpu(mad->sa_hdr.attr_offset) * 8;
+
+	iter = ib_sa_iter_create(event->mad_recv_wc);
+	while ((attr = ib_sa_iter_next(iter))) {
+		if (copy_to_user((void __user *) (unsigned long) response,
+				 attr, attr_offset)) {
+			ret = -EFAULT;
+			break;
+		}
+		response += attr_offset;
+	}
+
+	ib_sa_iter_free(iter);
+	return ret;
+}
+
+static ssize_t usa_get_data(struct usa_file *file, const char __user *inbuf,
+			    int in_len, int out_len)
+{
+	struct ib_usa_get_data cmd;
+	struct usa_event *event;
+	int ret = 0;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	event = get_event_data(file, cmd.id);
+	if (!event)
+		return -EINVAL;
+
+	if (out_len < event->resp.data_len) {
+		ret = -ENOSPC;
+		goto out;
+	}
+
+	ret = copy_event_data(event, cmd.response);
+out:
+	ib_free_recv_mad(event->mad_recv_wc);
+	kfree(event);
+	return ret;
+}
+
+static void usa_req_handler(int status, struct ib_mad_recv_wc *mad_recv_wc,
+			    void *context)
+{
+	struct usa_event *event = context;
+
+	if (mad_recv_wc) {
+		event->resp.data_len = mad_recv_wc->mad_len;
+
+		if (event->resp.data_len <= IB_USA_EVENT_DATA) {
+			memcpy(event->resp.data, mad_recv_wc->recv_buf.mad,
+			       event->resp.data_len);
+			ib_free_recv_mad(mad_recv_wc);
+		} else {
+			event->mad_recv_wc = mad_recv_wc;
+			memcpy(event->resp.data, mad_recv_wc->recv_buf.mad,
+			       IB_USA_EVENT_DATA);
+		}
+	}
+
+	event->resp.status = status;
+
+	mutex_lock(&event->file->file_mutex);
+	list_add_tail(&event->list, &event->file->event_list);
+	wake_up_interruptible(&event->file->poll_wait);
+	mutex_unlock(&event->file->file_mutex);
+}
+
+static int send_mad(struct usa_file *file, struct ib_usa_send_mad *cmd)
+{
+	struct usa_device *dev;
+	struct usa_event *event;
+	struct ib_sa_query *query;
+	int attr_size, ret;
+
+	attr_size = ib_sa_attr_size(cmd->attr_id);
+	if (!attr_size)
+		return -EINVAL;
+
+	dev = acquire_dev(cmd->node_guid, cmd->port_num);
+	if (!dev)
+		return -ENODEV;
+
+	event = kzalloc(sizeof *event, GFP_KERNEL);
+	if (!event) {
+		ret = -ENOMEM;
+		goto deref;
+	}
+
+	if (copy_from_user(event->resp.data,
+			   (void __user *) (unsigned long) cmd->attr,
+			   attr_size)) {
+		ret = -EFAULT;
+		goto free;
+	}
+
+	event->file = file;
+	event->resp.event = IB_USA_EVENT_MAD;
+	event->resp.uid = cmd->uid;
+
+	mutex_lock(&file->file_mutex);
+	event->resp.id = file->event_id++;
+	mutex_unlock(&file->file_mutex);
+
+	ret = ib_sa_send_mad(&file->sa_client, dev->device, cmd->port_num,
+			     cmd->method, event->resp.data, cmd->attr_id,
+			     (ib_sa_comp_mask) cmd->comp_mask,
+			     cmd->timeout_ms, cmd->retries, GFP_KERNEL,
+			     usa_req_handler, event, &query);
+	if (ret < 0)
+		goto free;
+
+	deref_dev(dev);
+	return 0;
+free:
+	kfree(event);
+deref:
+	deref_dev(dev);
+	return ret;
+}
+
+static ssize_t usa_send_mad(struct usa_file *file, const char __user *inbuf,
+			    int in_len, int out_len)
+{
+	struct ib_usa_send_mad cmd;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	return send_mad(file, &cmd);
+}
+
+static ssize_t usa_query(struct usa_file *file, const char __user *inbuf,
+			 int in_len, int out_len)
+{
+	struct ib_usa_send_mad cmd;
+	uint16_t attr_id;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	attr_id = be16_to_cpu(cmd.attr_id);
+
+	switch (cmd.method) {
+	case IB_MGMT_METHOD_GET:
+	case IB_SA_METHOD_GET_TABLE:
+		switch (attr_id) {
+		case IB_SA_ATTR_PATH_REC:
+		case IB_SA_ATTR_MC_MEMBER_REC:
+		case IB_SA_ATTR_SERVICE_REC:
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case IB_SA_METHOD_GET_MULTI:
+		if (attr_id != IB_SA_ATTR_MULTI_PATH_REC)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return send_mad(file, &cmd);
+}
+
+/*
+ * We can get up to two events for a single multicast member.  A second event
+ * only occurs if there's an error on an existing multicast membership.
+ * Report only the last event.
+ */
+static int multicast_handler(int status, struct ib_multicast *multicast)
+{
+	struct usa_multicast *mcast = multicast->context;
+
+	if (!status) {
+		mcast->event.resp.data_len = IB_SA_ATTR_MC_MEMBER_REC_LEN;
+		ib_sa_pack_attr(mcast->event.resp.data, &multicast->rec,
+				IB_SA_ATTR_MC_MEMBER_REC);
+	}
+
+	mutex_lock(&mcast->event.file->file_mutex);
+	mcast->event.resp.status = status;
+
+	list_del(&mcast->event.list);
+	list_add_tail(&mcast->event.list, &mcast->event.file->event_list);
+	wake_up_interruptible(&mcast->event.file->poll_wait);
+	mutex_unlock(&mcast->event.file->file_mutex);
+	return 0;
+}
+
+static ssize_t usa_join_mcast(struct usa_file *file, const char __user *inbuf,
+			      int in_len, int out_len)
+{
+	struct usa_device *dev;
+	struct usa_multicast *mcast;
+	struct ib_usa_join_mcast cmd;
+	struct ib_usa_id_resp resp;
+	struct ib_sa_mcmember_rec rec;
+	int ret;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	dev = acquire_dev(cmd.node_guid, cmd.port_num);
+	if (!dev)
+		return -ENODEV;
+
+	mcast = kzalloc(sizeof *mcast, GFP_KERNEL);
+	if (!mcast) {
+		ret = -ENOMEM;
+		goto deref;
+	}
+	INIT_LIST_HEAD(&mcast->event.list);
+	mcast->event.file = file;
+	mcast->event.resp.event = IB_USA_EVENT_MCAST;
+	mcast->event.resp.uid = cmd.uid;
+
+	ret = insert_obj(mcast, &mcast->event.resp.id);
+	if (ret)
+		goto free;
+
+	resp.id = mcast->event.resp.id;
+
+	mutex_lock(&file->file_mutex);
+	list_add_tail(&mcast->list, &file->mcast_list);
+	mutex_unlock(&file->file_mutex);
+
+	if (copy_from_user(mcast->event.resp.data,
+			   (void __user *) (unsigned long) cmd.mcmember_rec,
+			   IB_SA_ATTR_MC_MEMBER_REC_LEN)) {
+		ret = -EFAULT;
+		goto remove;
+	}
+
+	ib_sa_unpack_attr(&rec, mcast->event.resp.data,
+			  IB_SA_ATTR_MC_MEMBER_REC);
+	mcast->multicast = ib_join_multicast(dev->device, cmd.port_num, &rec,
+					     (ib_sa_comp_mask) cmd.comp_mask,
+					     GFP_KERNEL, multicast_handler,
+					     mcast);
+	if (IS_ERR(mcast->multicast)) {
+		ret = PTR_ERR(mcast->multicast);
+		goto remove;
+	}
+
+	deref_dev(dev);
+	return 0;
+remove:
+	mutex_lock(&file->file_mutex);
+	list_del(&mcast->list);
+	mutex_unlock(&file->file_mutex);
+	remove_obj(mcast->event.resp.id);
+free:
+	kfree(mcast);
+deref:
+	deref_dev(dev);
+	return ret;
+}
+
+static ssize_t usa_free_id(struct usa_file *file, const char __user *inbuf,
+			   int in_len, int out_len)
+{
+	struct ib_usa_free_id cmd;
+	struct ib_usa_free_resp resp;
+	struct usa_multicast *mcast;
+	int ret = 0;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	mutex_lock(&usa_mutex);
+	mcast = idr_find(&usa_idr, cmd.id);
+	if (!mcast)
+		mcast = ERR_PTR(-ENOENT);
+	else if (mcast->event.file != file)
+		mcast = ERR_PTR(-EINVAL);
+	else
+		idr_remove(&usa_idr, mcast->event.resp.id);
+	mutex_unlock(&usa_mutex);
+
+	if (IS_ERR(mcast))
+		return PTR_ERR(mcast);
+
+	ib_free_multicast(mcast->multicast);
+	mutex_lock(&file->file_mutex);
+	list_del(&mcast->list);
+	mutex_unlock(&file->file_mutex);
+
+	resp.events_reported = mcast->events_reported;
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
+		ret = -EFAULT;
+
+	kfree(mcast);
+	return ret;
+}
+
+static ssize_t usa_get_mcast(struct usa_file *file, const char __user *inbuf,
+			     int in_len, int out_len)
+{
+	struct usa_device *dev;
+	struct ib_usa_get_mcast cmd;
+	struct ib_sa_mcmember_rec rec;
+	u8 mcmember_rec[IB_SA_ATTR_MC_MEMBER_REC_LEN];
+	int ret;
+
+	if (out_len < sizeof(IB_SA_ATTR_MC_MEMBER_REC_LEN))
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	dev = acquire_dev(cmd.node_guid, cmd.port_num);
+	if (!dev)
+		return -ENODEV;
+
+	ret = ib_get_mcmember_rec(dev->device, cmd.port_num,
+				  (union ib_gid *) cmd.mgid, &rec);
+	if (!ret) {
+		ib_sa_pack_attr(mcmember_rec, &rec, IB_SA_ATTR_MC_MEMBER_REC);
+		if (copy_to_user((void __user *) (unsigned long) cmd.response,
+				 mcmember_rec, IB_SA_ATTR_MC_MEMBER_REC_LEN))
+			ret = -EFAULT;
+	}
+
+	deref_dev(dev);
+	return ret;
+}
+
+static ssize_t (*usa_cmd_table[])(struct usa_file *file,
+				   const char __user *inbuf,
+				   int in_len, int out_len) = {
+	[IB_USA_CMD_SEND_MAD]	= usa_query,	/* Limited queries by default */
+	[IB_USA_CMD_GET_EVENT]	= usa_get_event,
+	[IB_USA_CMD_GET_DATA]	= usa_get_data,
+	[IB_USA_CMD_JOIN_MCAST]	= usa_join_mcast,
+	[IB_USA_CMD_FREE_ID]	= usa_free_id,
+	[IB_USA_CMD_GET_MCAST]	= usa_get_mcast,
+};
+
+static ssize_t usa_raw_write(struct file *filp, const char __user *buf,
+			     size_t len, loff_t *pos)
+{
+	struct usa_file *file = filp->private_data;
+	struct ib_usa_cmd_hdr hdr;
+	ssize_t ret;
+
+	if (len < sizeof(hdr))
+		return -EINVAL;
+
+	if (copy_from_user(&hdr, buf, sizeof(hdr)))
+		return -EFAULT;
+
+	if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(usa_cmd_table))
+		return -EINVAL;
+
+	if (hdr.in + sizeof(hdr) > len)
+		return -EINVAL;
+
+	if (hdr.cmd == IB_USA_CMD_SEND_MAD)
+		ret = usa_send_mad(file, buf + sizeof(hdr), hdr.in, hdr.out);
+	else
+		ret = usa_cmd_table[hdr.cmd](file, buf + sizeof(hdr),
+					     hdr.in, hdr.out);
+	if (!ret)
+		ret = len;
+
+	return ret;
+}
+
+static ssize_t usa_default_write(struct file *filp, const char __user *buf,
+				 size_t len, loff_t *pos)
+{
+	struct usa_file *file = filp->private_data;
+	struct ib_usa_cmd_hdr hdr;
+	ssize_t ret;
+
+	if (len < sizeof(hdr))
+		return -EINVAL;
+
+	if (copy_from_user(&hdr, buf, sizeof(hdr)))
+		return -EFAULT;
+
+	if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(usa_cmd_table))
+		return -EINVAL;
+
+	if (hdr.in + sizeof(hdr) > len)
+		return -EINVAL;
+
+	ret = usa_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out);
+	if (!ret)
+		ret = len;
+
+	return ret;
+}
+
+static unsigned int usa_poll(struct file *filp, struct poll_table_struct *wait)
+{
+	struct usa_file *file = filp->private_data;
+	unsigned int mask = 0;
+
+	poll_wait(filp, &file->poll_wait, wait);
+
+	if (!list_empty(&file->event_list))
+		mask = POLLIN | POLLRDNORM;
+
+	return mask;
+}
+
+static int usa_open(struct inode *inode, struct file *filp)
+{
+	struct usa_file *file;
+
+	file = kmalloc(sizeof *file, GFP_KERNEL);
+	if (!file)
+		return -ENOMEM;
+
+	ib_sa_register_client(&file->sa_client);
+
+	INIT_LIST_HEAD(&file->event_list);
+	INIT_LIST_HEAD(&file->data_list);
+	INIT_LIST_HEAD(&file->mcast_list);
+	init_waitqueue_head(&file->poll_wait);
+	mutex_init(&file->file_mutex);
+
+	filp->private_data = file;
+	file->filp = filp;
+	return 0;
+}
+
+static void cleanup_events(struct list_head *list)
+{
+	struct usa_event *event;
+
+	while (!list_empty(list)) {
+		event = list_entry(list->next, struct usa_event, list);
+		list_del(&event->list);
+
+		if (event->mad_recv_wc)
+			ib_free_recv_mad(event->mad_recv_wc);
+
+		kfree(event);
+	}
+}
+
+static void cleanup_mcast(struct usa_file *file)
+{
+	struct usa_multicast *mcast;
+
+	while (!list_empty(&file->mcast_list)) {
+		mcast = list_entry(file->mcast_list.next,
+				   struct usa_multicast, list);
+		list_del(&mcast->list);
+
+		remove_obj(mcast->event.resp.id);
+
+		ib_free_multicast(mcast->multicast);
+
+		/*
+		 * Other members may still be generating events, so we need
+		 * to lock the event list to avoid corrupting it.
+		 */
+		mutex_lock(&file->file_mutex);
+		list_del(&mcast->event.list);
+		mutex_unlock(&file->file_mutex);
+
+		kfree(mcast);
+	}
+}
+
+static int usa_close(struct inode *inode, struct file *filp)
+{
+	struct usa_file *file = filp->private_data;
+
+	ib_sa_unregister_client(&file->sa_client);
+	cleanup_mcast(file);
+
+	cleanup_events(&file->event_list);
+	cleanup_events(&file->data_list);
+	kfree(file);
+	return 0;
+}
+
+static void usa_add_one(struct ib_device *device)
+{
+	struct usa_device *dev;
+
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	dev = kmalloc(sizeof *dev, GFP_KERNEL);
+	if (!dev)
+		return;
+
+	dev->device = device;
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
+		dev->start_port = dev->end_port = 0;
+	else {
+		dev->start_port = 1;
+		dev->end_port = device->phys_port_cnt;
+	}
+
+	init_completion(&dev->comp);
+	atomic_set(&dev->refcount, 1);
+	ib_set_client_data(device, &usa_client, dev);
+
+	mutex_lock(&usa_mutex);
+	list_add_tail(&dev->list, &dev_list);
+	mutex_unlock(&usa_mutex);
+}
+
+static void usa_remove_one(struct ib_device *device)
+{
+	struct usa_device *dev;
+
+	dev = ib_get_client_data(device, &usa_client);
+	if (!dev)
+		return;
+
+	mutex_lock(&usa_mutex);
+	list_del(&dev->list);
+	mutex_unlock(&usa_mutex);
+
+	deref_dev(dev);
+	wait_for_completion(&dev->comp);
+	kfree(dev);
+}
+
+static struct file_operations usa_raw_fops = {
+	.owner 	 = THIS_MODULE,
+	.open 	 = usa_open,
+	.release = usa_close,
+	.write	 = usa_raw_write,
+	.poll    = usa_poll,
+};
+
+static struct miscdevice usa_raw_misc = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "ib_usa_raw",
+	.fops	= &usa_raw_fops,
+};
+
+static struct file_operations usa_default_fops = {
+	.owner 	 = THIS_MODULE,
+	.open 	 = usa_open,
+	.release = usa_close,
+	.write	 = usa_default_write,
+	.poll    = usa_poll,
+};
+
+static struct miscdevice usa_default_misc = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "ib_usa_default",
+	.fops	= &usa_default_fops,
+};
+
+static ssize_t show_abi_version(struct class_device *class_dev, char *buf)
+{
+	return sprintf(buf, "%d\n", IB_USA_ABI_VERSION);
+}
+static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static int __init usa_init(void)
+{
+	int ret;
+
+	ret = misc_register(&usa_raw_misc);
+	if (ret)
+		return ret;
+
+	ret = misc_register(&usa_default_misc);
+	if (ret)
+		goto err1;
+
+	ret = class_device_create_file(usa_default_misc.class,
+				       &class_device_attr_abi_version);
+	if (ret)
+		goto err2;
+
+	ret = ib_register_client(&usa_client);
+	if (ret)
+		goto err3;
+	return 0;
+
+err3:
+	class_device_remove_file(usa_default_misc.class, 
+				 &class_device_attr_abi_version);
+err2:
+	misc_deregister(&usa_default_misc);
+err1:
+	misc_deregister(&usa_raw_misc);
+	return ret;
+}
+
+static void __exit usa_cleanup(void)
+{
+	ib_unregister_client(&usa_client);
+	class_device_remove_file(usa_default_misc.class, 
+				 &class_device_attr_abi_version);
+	misc_deregister(&usa_default_misc);
+	misc_deregister(&usa_raw_misc);
+	idr_destroy(&usa_idr);
+}
+
+module_init(usa_init);
+module_exit(usa_cleanup);
Index: Kconfig
===================================================================
--- Kconfig	(revision 9096)
+++ Kconfig	(working copy)
@@ -17,15 +17,15 @@ config INFINIBAND_USER_MAD
 	  need libibumad from <http://www.openib.org>.
 
 config INFINIBAND_USER_ACCESS
-	tristate "InfiniBand userspace access (verbs and CM)"
+	tristate "InfiniBand userspace access (verbs, CM, SA client)"
 	depends on INFINIBAND
 	---help---
 	  Userspace InfiniBand access support.  This enables the
-	  kernel side of userspace verbs and the userspace
-	  communication manager (CM).  This allows userspace processes
-	  to set up connections and directly access InfiniBand
+	  kernel side of userspace verbs, the userspace communication
+	  manager (CM), and userspace SA client.  This allows userspace
+	  processes to set up connections and directly access InfiniBand
 	  hardware for fast-path operations.  You will also need
-	  libibverbs, libibcm and a hardware driver library from
+	  libibverbs, libibcm, libibsa, and a hardware driver library from
 	  <http://www.openib.org>.
 
 config INFINIBAND_ADDR_TRANS
Index: core/Makefile
===================================================================
--- core/Makefile	(revision 9096)
+++ core/Makefile	(working copy)
@@ -7,7 +7,8 @@ obj-$(CONFIG_INFINIBAND) +=		ib_core.o i
 					ib_sa.o $(infiniband-y) \
 					findex.o ib_multicast.o
 obj-$(CONFIG_INFINIBAND_USER_MAD) += 	ib_umad.o
-obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o $(user_access-y)
+obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o ib_usa.o \
+					$(user_access-y)
 
 findex-y :=			index.o
 
@@ -39,3 +40,5 @@ ib_uverbs-y :=			uverbs_main.o uverbs_cm
 
 ib_ucm-y :=			ucm.o
 
+ib_usa-y :=			usa.o
+





More information about the general mailing list