[openib-general] [PATCH v2] ib_usa: support userspace SA queries and multicast
Sean Hefty
sean.hefty at intel.com
Thu Aug 24 17:10:00 PDT 2006
Changes from v1:
The ib_usa module exports two files: ib_usa_default and ib_usa_raw.
Use of the ib_usa_default restricts the user to sending PathRecord,
MultiPathRecord, MCMemberRecord, and ServiceRecord queries, and joining /
leaving multicast groups.
Use of ib_usa_raw allows any MADs to be sent to the SA.
An administrator can set control on these files in any appropriate way.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
Index: include/rdma/ib_usa.h
===================================================================
--- include/rdma/ib_usa.h (revision 0)
+++ include/rdma/ib_usa.h (revision 0)
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_USA_H
+#define IB_USA_H
+
+#include <linux/types.h>
+#include <rdma/ib_sa.h>
+
+#define IB_USA_ABI_VERSION 1
+
+#define IB_USA_EVENT_DATA 256
+
+enum {
+ IB_USA_CMD_SEND_MAD,
+ IB_USA_CMD_GET_EVENT,
+ IB_USA_CMD_GET_DATA,
+ IB_USA_CMD_JOIN_MCAST,
+ IB_USA_CMD_FREE_ID,
+ IB_USA_CMD_GET_MCAST
+};
+
+enum {
+ IB_USA_EVENT_MAD,
+ IB_USA_EVENT_MCAST
+};
+
+struct ib_usa_cmd_hdr {
+ __u32 cmd;
+ __u16 in;
+ __u16 out;
+};
+
+struct ib_usa_send_mad {
+ __u64 response; /* unused - reserved */
+ __u64 uid;
+ __u64 node_guid;
+ __u64 comp_mask;
+ __u64 attr;
+ __u8 port_num;
+ __u8 method;
+ __be16 attr_id;
+ __u32 timeout_ms;
+ __u32 retries;
+};
+
+struct ib_usa_join_mcast {
+ __u64 response;
+ __u64 uid;
+ __u64 node_guid;
+ __u64 comp_mask;
+ __u64 mcmember_rec;
+ __u8 port_num;
+};
+
+struct ib_usa_id_resp {
+ __u32 id;
+};
+
+struct ib_usa_free_resp {
+ __u32 events_reported;
+};
+
+struct ib_usa_free_id {
+ __u64 response;
+ __u32 id;
+};
+
+struct ib_usa_get_event {
+ __u64 response;
+};
+
+struct ib_usa_event_resp {
+ __u64 uid;
+ __u32 id;
+ __u32 event;
+ __u32 status;
+ __u32 data_len;
+ __u8 data[IB_USA_EVENT_DATA];
+};
+
+struct ib_usa_get_data {
+ __u64 response;
+ __u32 id;
+};
+
+struct ib_usa_get_mcast {
+ __u64 response;
+ __u64 node_guid;
+ __u8 mgid[16];
+ __u8 port_num;
+};
+
+#endif /* IB_USA_H */
Index: core/usa.c
===================================================================
--- core/usa.c (revision 0)
+++ core/usa.c (revision 0)
@@ -0,0 +1,846 @@
+/*
+ * Copyright (c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/idr.h>
+#include <linux/miscdevice.h>
+
+#include <rdma/ib_usa.h>
+#include <rdma/ib_multicast.h>
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("IB userspace SA query");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static void usa_add_one(struct ib_device *device);
+static void usa_remove_one(struct ib_device *device);
+
+static struct ib_client usa_client = {
+ .name = "ib_usa",
+ .add = usa_add_one,
+ .remove = usa_remove_one
+};
+
+struct usa_device {
+ struct list_head list;
+ struct ib_device *device;
+ struct completion comp;
+ atomic_t refcount;
+ int start_port;
+ int end_port;
+};
+
+struct usa_file {
+ struct mutex file_mutex;
+ struct file *filp;
+ struct ib_sa_client sa_client;
+ struct list_head event_list;
+ struct list_head data_list;
+ struct list_head mcast_list;
+ wait_queue_head_t poll_wait;
+ int event_id;
+};
+
+struct usa_event {
+ struct usa_file *file;
+ struct list_head list;
+ struct ib_usa_event_resp resp;
+ struct ib_mad_recv_wc *mad_recv_wc;
+};
+
+struct usa_multicast {
+ struct usa_event event;
+ struct list_head list;
+ struct ib_multicast *multicast;
+ int events_reported;
+};
+
+static DEFINE_MUTEX(usa_mutex);
+static LIST_HEAD(dev_list);
+static DEFINE_IDR(usa_idr);
+
+static struct usa_device *acquire_dev(__be64 guid, __u8 port_num)
+{
+ struct usa_device *dev;
+
+ mutex_lock(&usa_mutex);
+ list_for_each_entry(dev, &dev_list, list) {
+ if (dev->device->node_guid == guid) {
+ if (port_num < dev->start_port ||
+ port_num > dev->end_port)
+ break;
+ atomic_inc(&dev->refcount);
+ mutex_unlock(&usa_mutex);
+ return dev;
+ }
+ }
+ mutex_unlock(&usa_mutex);
+ return NULL;
+}
+
+static void deref_dev(struct usa_device *dev)
+{
+ if (atomic_dec_and_test(&dev->refcount))
+ complete(&dev->comp);
+}
+
+static int insert_obj(void *obj, int *id)
+{
+ int ret;
+
+ do {
+ ret = idr_pre_get(&usa_idr, GFP_KERNEL);
+ if (!ret)
+ break;
+
+ mutex_lock(&usa_mutex);
+ ret = idr_get_new(&usa_idr, obj, id);
+ mutex_unlock(&usa_mutex);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+static void remove_obj(int id)
+{
+ mutex_lock(&usa_mutex);
+ idr_remove(&usa_idr, id);
+ mutex_unlock(&usa_mutex);
+}
+
+static void finish_event(struct usa_event *event)
+{
+ struct usa_multicast *mcast;
+
+ switch (event->resp.event) {
+ case IB_USA_EVENT_MAD:
+ list_del(&event->list);
+ if (event->resp.data_len > IB_USA_EVENT_DATA)
+ list_add_tail(&event->list, &event->file->data_list);
+ else
+ kfree(event);
+ break;
+ case IB_USA_EVENT_MCAST:
+ list_del_init(&event->list);
+ mcast = container_of(event, struct usa_multicast, event);
+ mcast->events_reported++;
+ break;
+ default:
+ break;
+ }
+}
+
+static ssize_t usa_get_event(struct usa_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_usa_get_event cmd;
+ struct usa_event *event;
+ int ret = 0;
+ DEFINE_WAIT(wait);
+
+ if (out_len < sizeof(struct ib_usa_event_resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ mutex_lock(&file->file_mutex);
+ while (list_empty(&file->event_list)) {
+ if (file->filp->f_flags & O_NONBLOCK) {
+ ret = -EAGAIN;
+ break;
+ }
+
+ if (signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+
+ prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
+ mutex_unlock(&file->file_mutex);
+ schedule();
+ mutex_lock(&file->file_mutex);
+ finish_wait(&file->poll_wait, &wait);
+ }
+
+ if (ret)
+ goto done;
+
+ event = list_entry(file->event_list.next, struct usa_event, list);
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &event->resp, sizeof(event->resp))) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ finish_event(event);
+done:
+ mutex_unlock(&file->file_mutex);
+ return ret;
+}
+
+static struct usa_event *get_event_data(struct usa_file *file, __u32 id)
+{
+ struct usa_event *event;
+
+ mutex_lock(&file->file_mutex);
+ list_for_each_entry(event, &file->data_list, list) {
+ if (event->resp.id == id) {
+ list_del(&event->list);
+ mutex_unlock(&file->file_mutex);
+ return event;
+ }
+ }
+ mutex_unlock(&file->file_mutex);
+ return NULL;
+}
+
+static int copy_event_data(struct usa_event *event, __u64 response)
+{
+ struct ib_sa_mad *mad;
+ struct ib_sa_iter *iter;
+ int attr_offset, ret = 0;
+ void *attr;
+
+ mad = (struct ib_sa_mad *) event->mad_recv_wc->recv_buf.mad;
+ attr_offset = be16_to_cpu(mad->sa_hdr.attr_offset) * 8;
+
+ iter = ib_sa_iter_create(event->mad_recv_wc);
+ while ((attr = ib_sa_iter_next(iter))) {
+ if (copy_to_user((void __user *) (unsigned long) response,
+ attr, attr_offset)) {
+ ret = -EFAULT;
+ break;
+ }
+ response += attr_offset;
+ }
+
+ ib_sa_iter_free(iter);
+ return ret;
+}
+
+static ssize_t usa_get_data(struct usa_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_usa_get_data cmd;
+ struct usa_event *event;
+ int ret = 0;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ event = get_event_data(file, cmd.id);
+ if (!event)
+ return -EINVAL;
+
+ if (out_len < event->resp.data_len) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ ret = copy_event_data(event, cmd.response);
+out:
+ ib_free_recv_mad(event->mad_recv_wc);
+ kfree(event);
+ return ret;
+}
+
+static void usa_req_handler(int status, struct ib_mad_recv_wc *mad_recv_wc,
+ void *context)
+{
+ struct usa_event *event = context;
+
+ if (mad_recv_wc) {
+ event->resp.data_len = mad_recv_wc->mad_len;
+
+ if (event->resp.data_len <= IB_USA_EVENT_DATA) {
+ memcpy(event->resp.data, mad_recv_wc->recv_buf.mad,
+ event->resp.data_len);
+ ib_free_recv_mad(mad_recv_wc);
+ } else {
+ event->mad_recv_wc = mad_recv_wc;
+ memcpy(event->resp.data, mad_recv_wc->recv_buf.mad,
+ IB_USA_EVENT_DATA);
+ }
+ }
+
+ event->resp.status = status;
+
+ mutex_lock(&event->file->file_mutex);
+ list_add_tail(&event->list, &event->file->event_list);
+ wake_up_interruptible(&event->file->poll_wait);
+ mutex_unlock(&event->file->file_mutex);
+}
+
+static int send_mad(struct usa_file *file, struct ib_usa_send_mad *cmd)
+{
+ struct usa_device *dev;
+ struct usa_event *event;
+ struct ib_sa_query *query;
+ int attr_size, ret;
+
+ attr_size = ib_sa_attr_size(cmd->attr_id);
+ if (!attr_size)
+ return -EINVAL;
+
+ dev = acquire_dev(cmd->node_guid, cmd->port_num);
+ if (!dev)
+ return -ENODEV;
+
+ event = kzalloc(sizeof *event, GFP_KERNEL);
+ if (!event) {
+ ret = -ENOMEM;
+ goto deref;
+ }
+
+ if (copy_from_user(event->resp.data,
+ (void __user *) (unsigned long) cmd->attr,
+ attr_size)) {
+ ret = -EFAULT;
+ goto free;
+ }
+
+ event->file = file;
+ event->resp.event = IB_USA_EVENT_MAD;
+ event->resp.uid = cmd->uid;
+
+ mutex_lock(&file->file_mutex);
+ event->resp.id = file->event_id++;
+ mutex_unlock(&file->file_mutex);
+
+ ret = ib_sa_send_mad(&file->sa_client, dev->device, cmd->port_num,
+ cmd->method, event->resp.data, cmd->attr_id,
+ (ib_sa_comp_mask) cmd->comp_mask,
+ cmd->timeout_ms, cmd->retries, GFP_KERNEL,
+ usa_req_handler, event, &query);
+ if (ret < 0)
+ goto free;
+
+ deref_dev(dev);
+ return 0;
+free:
+ kfree(event);
+deref:
+ deref_dev(dev);
+ return ret;
+}
+
+static ssize_t usa_send_mad(struct usa_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_usa_send_mad cmd;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ return send_mad(file, &cmd);
+}
+
+static ssize_t usa_query(struct usa_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_usa_send_mad cmd;
+ uint16_t attr_id;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ attr_id = be16_to_cpu(cmd.attr_id);
+
+ switch (cmd.method) {
+ case IB_MGMT_METHOD_GET:
+ case IB_SA_METHOD_GET_TABLE:
+ switch (attr_id) {
+ case IB_SA_ATTR_PATH_REC:
+ case IB_SA_ATTR_MC_MEMBER_REC:
+ case IB_SA_ATTR_SERVICE_REC:
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case IB_SA_METHOD_GET_MULTI:
+ if (attr_id != IB_SA_ATTR_MULTI_PATH_REC)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return send_mad(file, &cmd);
+}
+
+/*
+ * We can get up to two events for a single multicast member. A second event
+ * only occurs if there's an error on an existing multicast membership.
+ * Report only the last event.
+ */
+static int multicast_handler(int status, struct ib_multicast *multicast)
+{
+ struct usa_multicast *mcast = multicast->context;
+
+ if (!status) {
+ mcast->event.resp.data_len = IB_SA_ATTR_MC_MEMBER_REC_LEN;
+ ib_sa_pack_attr(mcast->event.resp.data, &multicast->rec,
+ IB_SA_ATTR_MC_MEMBER_REC);
+ }
+
+ mutex_lock(&mcast->event.file->file_mutex);
+ mcast->event.resp.status = status;
+
+ list_del(&mcast->event.list);
+ list_add_tail(&mcast->event.list, &mcast->event.file->event_list);
+ wake_up_interruptible(&mcast->event.file->poll_wait);
+ mutex_unlock(&mcast->event.file->file_mutex);
+ return 0;
+}
+
+static ssize_t usa_join_mcast(struct usa_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct usa_device *dev;
+ struct usa_multicast *mcast;
+ struct ib_usa_join_mcast cmd;
+ struct ib_usa_id_resp resp;
+ struct ib_sa_mcmember_rec rec;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ dev = acquire_dev(cmd.node_guid, cmd.port_num);
+ if (!dev)
+ return -ENODEV;
+
+ mcast = kzalloc(sizeof *mcast, GFP_KERNEL);
+ if (!mcast) {
+ ret = -ENOMEM;
+ goto deref;
+ }
+ INIT_LIST_HEAD(&mcast->event.list);
+ mcast->event.file = file;
+ mcast->event.resp.event = IB_USA_EVENT_MCAST;
+ mcast->event.resp.uid = cmd.uid;
+
+ ret = insert_obj(mcast, &mcast->event.resp.id);
+ if (ret)
+ goto free;
+
+ resp.id = mcast->event.resp.id;
+
+ mutex_lock(&file->file_mutex);
+ list_add_tail(&mcast->list, &file->mcast_list);
+ mutex_unlock(&file->file_mutex);
+
+ if (copy_from_user(mcast->event.resp.data,
+ (void __user *) (unsigned long) cmd.mcmember_rec,
+ IB_SA_ATTR_MC_MEMBER_REC_LEN)) {
+ ret = -EFAULT;
+ goto remove;
+ }
+
+ ib_sa_unpack_attr(&rec, mcast->event.resp.data,
+ IB_SA_ATTR_MC_MEMBER_REC);
+ mcast->multicast = ib_join_multicast(dev->device, cmd.port_num, &rec,
+ (ib_sa_comp_mask) cmd.comp_mask,
+ GFP_KERNEL, multicast_handler,
+ mcast);
+ if (IS_ERR(mcast->multicast)) {
+ ret = PTR_ERR(mcast->multicast);
+ goto remove;
+ }
+
+ deref_dev(dev);
+ return 0;
+remove:
+ mutex_lock(&file->file_mutex);
+ list_del(&mcast->list);
+ mutex_unlock(&file->file_mutex);
+ remove_obj(mcast->event.resp.id);
+free:
+ kfree(mcast);
+deref:
+ deref_dev(dev);
+ return ret;
+}
+
+static ssize_t usa_free_id(struct usa_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_usa_free_id cmd;
+ struct ib_usa_free_resp resp;
+ struct usa_multicast *mcast;
+ int ret = 0;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ mutex_lock(&usa_mutex);
+ mcast = idr_find(&usa_idr, cmd.id);
+ if (!mcast)
+ mcast = ERR_PTR(-ENOENT);
+ else if (mcast->event.file != file)
+ mcast = ERR_PTR(-EINVAL);
+ else
+ idr_remove(&usa_idr, mcast->event.resp.id);
+ mutex_unlock(&usa_mutex);
+
+ if (IS_ERR(mcast))
+ return PTR_ERR(mcast);
+
+ ib_free_multicast(mcast->multicast);
+ mutex_lock(&file->file_mutex);
+ list_del(&mcast->list);
+ mutex_unlock(&file->file_mutex);
+
+ resp.events_reported = mcast->events_reported;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+ kfree(mcast);
+ return ret;
+}
+
+static ssize_t usa_get_mcast(struct usa_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct usa_device *dev;
+ struct ib_usa_get_mcast cmd;
+ struct ib_sa_mcmember_rec rec;
+ u8 mcmember_rec[IB_SA_ATTR_MC_MEMBER_REC_LEN];
+ int ret;
+
+ if (out_len < sizeof(IB_SA_ATTR_MC_MEMBER_REC_LEN))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ dev = acquire_dev(cmd.node_guid, cmd.port_num);
+ if (!dev)
+ return -ENODEV;
+
+ ret = ib_get_mcmember_rec(dev->device, cmd.port_num,
+ (union ib_gid *) cmd.mgid, &rec);
+ if (!ret) {
+ ib_sa_pack_attr(mcmember_rec, &rec, IB_SA_ATTR_MC_MEMBER_REC);
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ mcmember_rec, IB_SA_ATTR_MC_MEMBER_REC_LEN))
+ ret = -EFAULT;
+ }
+
+ deref_dev(dev);
+ return ret;
+}
+
+static ssize_t (*usa_cmd_table[])(struct usa_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len) = {
+ [IB_USA_CMD_SEND_MAD] = usa_query, /* Limited queries by default */
+ [IB_USA_CMD_GET_EVENT] = usa_get_event,
+ [IB_USA_CMD_GET_DATA] = usa_get_data,
+ [IB_USA_CMD_JOIN_MCAST] = usa_join_mcast,
+ [IB_USA_CMD_FREE_ID] = usa_free_id,
+ [IB_USA_CMD_GET_MCAST] = usa_get_mcast,
+};
+
+static ssize_t usa_raw_write(struct file *filp, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct usa_file *file = filp->private_data;
+ struct ib_usa_cmd_hdr hdr;
+ ssize_t ret;
+
+ if (len < sizeof(hdr))
+ return -EINVAL;
+
+ if (copy_from_user(&hdr, buf, sizeof(hdr)))
+ return -EFAULT;
+
+ if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(usa_cmd_table))
+ return -EINVAL;
+
+ if (hdr.in + sizeof(hdr) > len)
+ return -EINVAL;
+
+ if (hdr.cmd == IB_USA_CMD_SEND_MAD)
+ ret = usa_send_mad(file, buf + sizeof(hdr), hdr.in, hdr.out);
+ else
+ ret = usa_cmd_table[hdr.cmd](file, buf + sizeof(hdr),
+ hdr.in, hdr.out);
+ if (!ret)
+ ret = len;
+
+ return ret;
+}
+
+static ssize_t usa_default_write(struct file *filp, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct usa_file *file = filp->private_data;
+ struct ib_usa_cmd_hdr hdr;
+ ssize_t ret;
+
+ if (len < sizeof(hdr))
+ return -EINVAL;
+
+ if (copy_from_user(&hdr, buf, sizeof(hdr)))
+ return -EFAULT;
+
+ if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(usa_cmd_table))
+ return -EINVAL;
+
+ if (hdr.in + sizeof(hdr) > len)
+ return -EINVAL;
+
+ ret = usa_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out);
+ if (!ret)
+ ret = len;
+
+ return ret;
+}
+
+static unsigned int usa_poll(struct file *filp, struct poll_table_struct *wait)
+{
+ struct usa_file *file = filp->private_data;
+ unsigned int mask = 0;
+
+ poll_wait(filp, &file->poll_wait, wait);
+
+ if (!list_empty(&file->event_list))
+ mask = POLLIN | POLLRDNORM;
+
+ return mask;
+}
+
+static int usa_open(struct inode *inode, struct file *filp)
+{
+ struct usa_file *file;
+
+ file = kmalloc(sizeof *file, GFP_KERNEL);
+ if (!file)
+ return -ENOMEM;
+
+ ib_sa_register_client(&file->sa_client);
+
+ INIT_LIST_HEAD(&file->event_list);
+ INIT_LIST_HEAD(&file->data_list);
+ INIT_LIST_HEAD(&file->mcast_list);
+ init_waitqueue_head(&file->poll_wait);
+ mutex_init(&file->file_mutex);
+
+ filp->private_data = file;
+ file->filp = filp;
+ return 0;
+}
+
+static void cleanup_events(struct list_head *list)
+{
+ struct usa_event *event;
+
+ while (!list_empty(list)) {
+ event = list_entry(list->next, struct usa_event, list);
+ list_del(&event->list);
+
+ if (event->mad_recv_wc)
+ ib_free_recv_mad(event->mad_recv_wc);
+
+ kfree(event);
+ }
+}
+
+static void cleanup_mcast(struct usa_file *file)
+{
+ struct usa_multicast *mcast;
+
+ while (!list_empty(&file->mcast_list)) {
+ mcast = list_entry(file->mcast_list.next,
+ struct usa_multicast, list);
+ list_del(&mcast->list);
+
+ remove_obj(mcast->event.resp.id);
+
+ ib_free_multicast(mcast->multicast);
+
+ /*
+ * Other members may still be generating events, so we need
+ * to lock the event list to avoid corrupting it.
+ */
+ mutex_lock(&file->file_mutex);
+ list_del(&mcast->event.list);
+ mutex_unlock(&file->file_mutex);
+
+ kfree(mcast);
+ }
+}
+
+static int usa_close(struct inode *inode, struct file *filp)
+{
+ struct usa_file *file = filp->private_data;
+
+ ib_sa_unregister_client(&file->sa_client);
+ cleanup_mcast(file);
+
+ cleanup_events(&file->event_list);
+ cleanup_events(&file->data_list);
+ kfree(file);
+ return 0;
+}
+
+static void usa_add_one(struct ib_device *device)
+{
+ struct usa_device *dev;
+
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ dev = kmalloc(sizeof *dev, GFP_KERNEL);
+ if (!dev)
+ return;
+
+ dev->device = device;
+ if (device->node_type == RDMA_NODE_IB_SWITCH)
+ dev->start_port = dev->end_port = 0;
+ else {
+ dev->start_port = 1;
+ dev->end_port = device->phys_port_cnt;
+ }
+
+ init_completion(&dev->comp);
+ atomic_set(&dev->refcount, 1);
+ ib_set_client_data(device, &usa_client, dev);
+
+ mutex_lock(&usa_mutex);
+ list_add_tail(&dev->list, &dev_list);
+ mutex_unlock(&usa_mutex);
+}
+
+static void usa_remove_one(struct ib_device *device)
+{
+ struct usa_device *dev;
+
+ dev = ib_get_client_data(device, &usa_client);
+ if (!dev)
+ return;
+
+ mutex_lock(&usa_mutex);
+ list_del(&dev->list);
+ mutex_unlock(&usa_mutex);
+
+ deref_dev(dev);
+ wait_for_completion(&dev->comp);
+ kfree(dev);
+}
+
+static struct file_operations usa_raw_fops = {
+ .owner = THIS_MODULE,
+ .open = usa_open,
+ .release = usa_close,
+ .write = usa_raw_write,
+ .poll = usa_poll,
+};
+
+static struct miscdevice usa_raw_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "ib_usa_raw",
+ .fops = &usa_raw_fops,
+};
+
+static struct file_operations usa_default_fops = {
+ .owner = THIS_MODULE,
+ .open = usa_open,
+ .release = usa_close,
+ .write = usa_default_write,
+ .poll = usa_poll,
+};
+
+static struct miscdevice usa_default_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "ib_usa_default",
+ .fops = &usa_default_fops,
+};
+
+static ssize_t show_abi_version(struct class_device *class_dev, char *buf)
+{
+ return sprintf(buf, "%d\n", IB_USA_ABI_VERSION);
+}
+static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static int __init usa_init(void)
+{
+ int ret;
+
+ ret = misc_register(&usa_raw_misc);
+ if (ret)
+ return ret;
+
+ ret = misc_register(&usa_default_misc);
+ if (ret)
+ goto err1;
+
+ ret = class_device_create_file(usa_default_misc.class,
+ &class_device_attr_abi_version);
+ if (ret)
+ goto err2;
+
+ ret = ib_register_client(&usa_client);
+ if (ret)
+ goto err3;
+ return 0;
+
+err3:
+ class_device_remove_file(usa_default_misc.class,
+ &class_device_attr_abi_version);
+err2:
+ misc_deregister(&usa_default_misc);
+err1:
+ misc_deregister(&usa_raw_misc);
+ return ret;
+}
+
+static void __exit usa_cleanup(void)
+{
+ ib_unregister_client(&usa_client);
+ class_device_remove_file(usa_default_misc.class,
+ &class_device_attr_abi_version);
+ misc_deregister(&usa_default_misc);
+ misc_deregister(&usa_raw_misc);
+ idr_destroy(&usa_idr);
+}
+
+module_init(usa_init);
+module_exit(usa_cleanup);
Index: Kconfig
===================================================================
--- Kconfig (revision 9096)
+++ Kconfig (working copy)
@@ -17,15 +17,15 @@ config INFINIBAND_USER_MAD
need libibumad from <http://www.openib.org>.
config INFINIBAND_USER_ACCESS
- tristate "InfiniBand userspace access (verbs and CM)"
+ tristate "InfiniBand userspace access (verbs, CM, SA client)"
depends on INFINIBAND
---help---
Userspace InfiniBand access support. This enables the
- kernel side of userspace verbs and the userspace
- communication manager (CM). This allows userspace processes
- to set up connections and directly access InfiniBand
+ kernel side of userspace verbs, the userspace communication
+ manager (CM), and userspace SA client. This allows userspace
+ processes to set up connections and directly access InfiniBand
hardware for fast-path operations. You will also need
- libibverbs, libibcm and a hardware driver library from
+ libibverbs, libibcm, libibsa, and a hardware driver library from
<http://www.openib.org>.
config INFINIBAND_ADDR_TRANS
Index: core/Makefile
===================================================================
--- core/Makefile (revision 9096)
+++ core/Makefile (working copy)
@@ -7,7 +7,8 @@ obj-$(CONFIG_INFINIBAND) += ib_core.o i
ib_sa.o $(infiniband-y) \
findex.o ib_multicast.o
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
-obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o $(user_access-y)
+obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o ib_usa.o \
+ $(user_access-y)
findex-y := index.o
@@ -39,3 +40,5 @@ ib_uverbs-y := uverbs_main.o uverbs_cm
ib_ucm-y := ucm.o
+ib_usa-y := usa.o
+
More information about the general
mailing list