[ofa-general] [PATCH][RFC] P_Key support for umad
Roland Dreier
rdreier at cisco.com
Thu Sep 6 23:42:54 PDT 2007
Here is a long overdue patch to enable userspace to control the P_Key
index used for userspace MADs. I used the approach we discussed when
this first came up, namely adding an ioctl to enable to the new
interface so that existing binaries don't break.
I haven't had a chance to make all the userspace library changes to
test the new interface and I likely won't until I return home (I
should be done traveling for a few months after this week). I have
tested existing code against a kernel with this patch applied and it
seems to be OK, and I wanted to at least get this out for review as
soon as I had it.
Please review/test. I would like to get this into 2.6.24 if possible
since we've known so long that we needed it.
Thanks,
Roland
diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
index 8ec54b9..a3450aa 100644
--- a/Documentation/infiniband/user_mad.txt
+++ b/Documentation/infiniband/user_mad.txt
@@ -99,6 +99,20 @@ Transaction IDs
request/response pairs. The upper 32 bits are reserved for use by
the kernel and will be overwritten before a MAD is sent.
+P_Key Index Handling
+
+ The old ib_umad interface did not allow setting the P_Key index for
+ MADs that are sent and did not provide a way for obtaining the P_Key
+ index of received MADs. A new layout for struct ib_user_mad_hdr
+ with a pkey_index member has been defined; however, to preserve
+ binary compatibility with older applications, this new layout will
+ not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called
+ before a file description is used for anything else.
+
+ In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
+ to 6, the new layout of struct ib_user_mad_hdr will be used by
+ default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.
+
Setting IsSM Capability Bit
To set the IsSM capability bit for a port, simply open the
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index d97ded2..3a0e579 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -118,6 +118,8 @@ struct ib_umad_file {
wait_queue_head_t recv_wait;
struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
int agents_dead;
+ u8 use_pkey_index;
+ u8 already_used;
};
struct ib_umad_packet {
@@ -147,6 +149,12 @@ static void ib_umad_release_dev(struct kref *ref)
kfree(dev);
}
+static int hdr_size(struct ib_umad_file *file)
+{
+ return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
+ sizeof (struct ib_user_mad_hdr_old);
+}
+
/* caller must hold port->mutex at least for reading */
static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
{
@@ -221,13 +229,13 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->length = mad_recv_wc->mad_len;
packet->recv_wc = mad_recv_wc;
- packet->mad.hdr.status = 0;
- packet->mad.hdr.length = sizeof (struct ib_user_mad) +
- mad_recv_wc->mad_len;
- packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
- packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
- packet->mad.hdr.sl = mad_recv_wc->wc->sl;
- packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
+ packet->mad.hdr.status = 0;
+ packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len;
+ packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
+ packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
+ packet->mad.hdr.sl = mad_recv_wc->wc->sl;
+ packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
+ packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
if (packet->mad.hdr.grh_present) {
struct ib_ah_attr ah_attr;
@@ -253,8 +261,8 @@ err1:
ib_free_recv_mad(mad_recv_wc);
}
-static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
- size_t count)
+static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf,
+ struct ib_umad_packet *packet, size_t count)
{
struct ib_mad_recv_buf *recv_buf;
int left, seg_payload, offset, max_seg_payload;
@@ -262,15 +270,15 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
/* We need enough room to copy the first (or only) MAD segment. */
recv_buf = &packet->recv_wc->recv_buf;
if ((packet->length <= sizeof (*recv_buf->mad) &&
- count < sizeof (packet->mad) + packet->length) ||
+ count < hdr_size(file) + packet->length) ||
(packet->length > sizeof (*recv_buf->mad) &&
- count < sizeof (packet->mad) + sizeof (*recv_buf->mad)))
+ count < hdr_size(file) + sizeof (*recv_buf->mad)))
return -EINVAL;
- if (copy_to_user(buf, &packet->mad, sizeof (packet->mad)))
+ if (copy_to_user(buf, &packet->mad, hdr_size(file)))
return -EFAULT;
- buf += sizeof (packet->mad);
+ buf += hdr_size(file);
seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad));
if (copy_to_user(buf, recv_buf->mad, seg_payload))
return -EFAULT;
@@ -280,7 +288,7 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
* Multipacket RMPP MAD message. Copy remainder of message.
* Note that last segment may have a shorter payload.
*/
- if (count < sizeof (packet->mad) + packet->length) {
+ if (count < hdr_size(file) + packet->length) {
/*
* The buffer is too small, return the first RMPP segment,
* which includes the RMPP message length.
@@ -300,18 +308,23 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
return -EFAULT;
}
}
- return sizeof (packet->mad) + packet->length;
+ return hdr_size(file) + packet->length;
}
-static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet,
- size_t count)
+static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf,
+ struct ib_umad_packet *packet, size_t count)
{
- ssize_t size = sizeof (packet->mad) + packet->length;
+ ssize_t size = hdr_size(file) + packet->length;
if (count < size)
return -EINVAL;
- if (copy_to_user(buf, &packet->mad, size))
+ if (copy_to_user(buf, &packet->mad, hdr_size(file)))
+ return -EFAULT;
+
+ buf += hdr_size(file);
+
+ if (copy_to_user(buf, packet->mad.data, packet->length))
return -EFAULT;
return size;
@@ -324,7 +337,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
struct ib_umad_packet *packet;
ssize_t ret;
- if (count < sizeof (struct ib_user_mad))
+ if (count < hdr_size(file))
return -EINVAL;
spin_lock_irq(&file->recv_lock);
@@ -348,9 +361,9 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
spin_unlock_irq(&file->recv_lock);
if (packet->recv_wc)
- ret = copy_recv_mad(buf, packet, count);
+ ret = copy_recv_mad(file, buf, packet, count);
else
- ret = copy_send_mad(buf, packet, count);
+ ret = copy_send_mad(file, buf, packet, count);
if (ret < 0) {
/* Requeue packet */
@@ -442,15 +455,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
__be64 *tid;
int ret, data_len, hdr_len, copy_offset, rmpp_active;
- if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
+ if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
return -EINVAL;
packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
if (!packet)
return -ENOMEM;
- if (copy_from_user(&packet->mad, buf,
- sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) {
+ if (copy_from_user(&packet->mad, buf, hdr_size(file))) {
ret = -EFAULT;
goto err;
}
@@ -461,6 +473,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
goto err;
}
+ buf += hdr_size(file);
+
+ if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) {
+ ret = -EFAULT;
+ goto err;
+ }
+
down_read(&file->port->mutex);
agent = __get_agent(file, packet->mad.hdr.id);
@@ -500,11 +519,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
IB_MGMT_RMPP_FLAG_ACTIVE;
}
- data_len = count - sizeof (struct ib_user_mad) - hdr_len;
+ data_len = count - hdr_size(file) - hdr_len;
packet->msg = ib_create_send_mad(agent,
be32_to_cpu(packet->mad.hdr.qpn),
- 0, rmpp_active, hdr_len,
- data_len, GFP_KERNEL);
+ packet->mad.hdr.pkey_index, rmpp_active,
+ hdr_len, data_len, GFP_KERNEL);
if (IS_ERR(packet->msg)) {
ret = PTR_ERR(packet->msg);
goto err_ah;
@@ -517,7 +536,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
/* Copy MAD header. Any RMPP header is already in place. */
memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
- buf += sizeof (struct ib_user_mad);
if (!rmpp_active) {
if (copy_from_user(packet->msg->mad + copy_offset,
@@ -646,6 +664,7 @@ found:
goto out;
}
+ file->already_used = 1;
file->agent[agent_id] = agent;
ret = 0;
@@ -682,6 +701,20 @@ out:
return ret;
}
+static long ib_umad_enable_pkey(struct ib_umad_file *file)
+{
+ int ret = 0;
+
+ down_write(&file->port->mutex);
+ if (file->already_used)
+ ret = -EINVAL;
+ else
+ file->use_pkey_index = 1;
+ up_write(&file->port->mutex);
+
+ return ret;
+}
+
static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
@@ -690,6 +723,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
return ib_umad_reg_agent(filp->private_data, arg);
case IB_USER_MAD_UNREGISTER_AGENT:
return ib_umad_unreg_agent(filp->private_data, arg);
+ case IB_USER_MAD_ENABLE_PKEY:
+ return ib_umad_enable_pkey(filp->private_data);
default:
return -ENOIOCTLCMD;
}
diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h
index d66b15e..2a32043 100644
--- a/include/rdma/ib_user_mad.h
+++ b/include/rdma/ib_user_mad.h
@@ -52,7 +52,50 @@
*/
/**
+ * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index
+ * @id - ID of agent MAD received with/to be sent with
+ * @status - 0 on successful receive, ETIMEDOUT if no response
+ * received (transaction ID in data[] will be set to TID of original
+ * request) (ignored on send)
+ * @timeout_ms - Milliseconds to wait for response (unset on receive)
+ * @retries - Number of automatic retries to attempt
+ * @qpn - Remote QP number received from/to be sent to
+ * @qkey - Remote Q_Key to be sent with (unset on receive)
+ * @lid - Remote lid received from/to be sent to
+ * @sl - Service level received with/to be sent with
+ * @path_bits - Local path bits received with/to be sent with
+ * @grh_present - If set, GRH was received/should be sent
+ * @gid_index - Local GID index to send with (unset on receive)
+ * @hop_limit - Hop limit in GRH
+ * @traffic_class - Traffic class in GRH
+ * @gid - Remote GID in GRH
+ * @flow_label - Flow label in GRH
+ */
+struct ib_user_mad_hdr_old {
+ __u32 id;
+ __u32 status;
+ __u32 timeout_ms;
+ __u32 retries;
+ __u32 length;
+ __be32 qpn;
+ __be32 qkey;
+ __be16 lid;
+ __u8 sl;
+ __u8 path_bits;
+ __u8 grh_present;
+ __u8 gid_index;
+ __u8 hop_limit;
+ __u8 traffic_class;
+ __u8 gid[16];
+ __be32 flow_label;
+};
+
+/**
* ib_user_mad_hdr - MAD packet header
+ * This layout allows specifying/receiving the P_Key index. To use
+ * this capability, an application must call the
+ * IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before
+ * any other actions with the file handle.
* @id - ID of agent MAD received with/to be sent with
* @status - 0 on successful receive, ETIMEDOUT if no response
* received (transaction ID in data[] will be set to TID of original
@@ -70,6 +113,7 @@
* @traffic_class - Traffic class in GRH
* @gid - Remote GID in GRH
* @flow_label - Flow label in GRH
+ * @pkey_index - P_Key index
*/
struct ib_user_mad_hdr {
__u32 id;
@@ -88,6 +132,8 @@ struct ib_user_mad_hdr {
__u8 traffic_class;
__u8 gid[16];
__be32 flow_label;
+ __u16 pkey_index;
+ __u8 reserved[6];
};
/**
@@ -134,4 +180,6 @@ struct ib_user_mad_reg_req {
#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32)
+#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3)
+
#endif /* IB_USER_MAD_H */
More information about the general
mailing list