[ofa-general] [PATCH][RFC] P_Key support for umad
Hal Rosenstock
hal.rosenstock at gmail.com
Tue Sep 11 08:50:36 PDT 2007
On 9/10/07, Hal Rosenstock <hal.rosenstock at gmail.com> wrote:
> On 9/7/07, Roland Dreier <rdreier at cisco.com> wrote:
> > Here is a long overdue patch to enable userspace to control the P_Key
> > index used for userspace MADs. I used the approach we discussed when
> > this first came up, namely adding an ioctl to enable to the new
> > interface so that existing binaries don't break.
> >
> > I haven't had a chance to make all the userspace library changes to
> > test the new interface and I likely won't until I return home (I
> > should be done traveling for a few months after this week). I have
> > tested existing code against a kernel with this patch applied and it
> > seems to be OK, and I wanted to at least get this out for review as
> > soon as I had it.
> >
> > Please review/test. I would like to get this into 2.6.24 if possible
> > since we've known so long that we needed it.
>
> Thanks for doing this :-) One nit below in the doc.
>
> I spent some time testing it today in old mode and although my
> environment is limited, I did have trouble with an RMPP test as
> follows:
>
> Can someone try the following with OpenSM running:
>
> First, osmtest -f c
> and then
> osmtest -f a
>
> All on same node with new user_mad module.
>
> That seems to hangup rather than complete for me. I didn't have time
> to track this down any further.
With clearer eyes this morning, I was able to see what my problem was.
This test now is working. So although I am unable to review the packet
contents on the wire, I am reasonably confident that hasn't changed
although I would feel better knowing someone explictly did this.
Bottom line is this seems to work in old mode for me.
Sasha,
Will you be testing this ?
-- Hal
> -- Hal
>
> > Thanks,
> > Roland
> >
> >
> > diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
> > index 8ec54b9..a3450aa 100644
> > --- a/Documentation/infiniband/user_mad.txt
> > +++ b/Documentation/infiniband/user_mad.txt
> > @@ -99,6 +99,20 @@ Transaction IDs
> > request/response pairs. The upper 32 bits are reserved for use by
> > the kernel and will be overwritten before a MAD is sent.
> >
> > +P_Key Index Handling
> > +
> > + The old ib_umad interface did not allow setting the P_Key index for
> > + MADs that are sent and did not provide a way for obtaining the P_Key
> > + index of received MADs. A new layout for struct ib_user_mad_hdr
> > + with a pkey_index member has been defined; however, to preserve
> > + binary compatibility with older applications, this new layout will
> > + not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called
> > + before a file description is used for anything else.
>
> Nit: Should this be "file descriptor" ?
>
> > +
> > + In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
> > + to 6, the new layout of struct ib_user_mad_hdr will be used by
> > + default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.
> > +
> > Setting IsSM Capability Bit
> >
> > To set the IsSM capability bit for a port, simply open the
> > diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
> > index d97ded2..3a0e579 100644
> > --- a/drivers/infiniband/core/user_mad.c
> > +++ b/drivers/infiniband/core/user_mad.c
> > @@ -118,6 +118,8 @@ struct ib_umad_file {
> > wait_queue_head_t recv_wait;
> > struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
> > int agents_dead;
> > + u8 use_pkey_index;
> > + u8 already_used;
> > };
> >
> > struct ib_umad_packet {
> > @@ -147,6 +149,12 @@ static void ib_umad_release_dev(struct kref *ref)
> > kfree(dev);
> > }
> >
> > +static int hdr_size(struct ib_umad_file *file)
> > +{
> > + return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
> > + sizeof (struct ib_user_mad_hdr_old);
> > +}
> > +
> > /* caller must hold port->mutex at least for reading */
> > static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
> > {
> > @@ -221,13 +229,13 @@ static void recv_handler(struct ib_mad_agent *agent,
> > packet->length = mad_recv_wc->mad_len;
> > packet->recv_wc = mad_recv_wc;
> >
> > - packet->mad.hdr.status = 0;
> > - packet->mad.hdr.length = sizeof (struct ib_user_mad) +
> > - mad_recv_wc->mad_len;
> > - packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
> > - packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
> > - packet->mad.hdr.sl = mad_recv_wc->wc->sl;
> > - packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
> > + packet->mad.hdr.status = 0;
> > + packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len;
> > + packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
> > + packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
> > + packet->mad.hdr.sl = mad_recv_wc->wc->sl;
> > + packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
> > + packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
> > packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
> > if (packet->mad.hdr.grh_present) {
> > struct ib_ah_attr ah_attr;
> > @@ -253,8 +261,8 @@ err1:
> > ib_free_recv_mad(mad_recv_wc);
> > }
> >
> > -static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
> > - size_t count)
> > +static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf,
> > + struct ib_umad_packet *packet, size_t count)
> > {
> > struct ib_mad_recv_buf *recv_buf;
> > int left, seg_payload, offset, max_seg_payload;
> > @@ -262,15 +270,15 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
> > /* We need enough room to copy the first (or only) MAD segment. */
> > recv_buf = &packet->recv_wc->recv_buf;
> > if ((packet->length <= sizeof (*recv_buf->mad) &&
> > - count < sizeof (packet->mad) + packet->length) ||
> > + count < hdr_size(file) + packet->length) ||
> > (packet->length > sizeof (*recv_buf->mad) &&
> > - count < sizeof (packet->mad) + sizeof (*recv_buf->mad)))
> > + count < hdr_size(file) + sizeof (*recv_buf->mad)))
> > return -EINVAL;
> >
> > - if (copy_to_user(buf, &packet->mad, sizeof (packet->mad)))
> > + if (copy_to_user(buf, &packet->mad, hdr_size(file)))
> > return -EFAULT;
> >
> > - buf += sizeof (packet->mad);
> > + buf += hdr_size(file);
> > seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad));
> > if (copy_to_user(buf, recv_buf->mad, seg_payload))
> > return -EFAULT;
> > @@ -280,7 +288,7 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
> > * Multipacket RMPP MAD message. Copy remainder of message.
> > * Note that last segment may have a shorter payload.
> > */
> > - if (count < sizeof (packet->mad) + packet->length) {
> > + if (count < hdr_size(file) + packet->length) {
> > /*
> > * The buffer is too small, return the first RMPP segment,
> > * which includes the RMPP message length.
> > @@ -300,18 +308,23 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
> > return -EFAULT;
> > }
> > }
> > - return sizeof (packet->mad) + packet->length;
> > + return hdr_size(file) + packet->length;
> > }
> >
> > -static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet,
> > - size_t count)
> > +static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf,
> > + struct ib_umad_packet *packet, size_t count)
> > {
> > - ssize_t size = sizeof (packet->mad) + packet->length;
> > + ssize_t size = hdr_size(file) + packet->length;
> >
> > if (count < size)
> > return -EINVAL;
> >
> > - if (copy_to_user(buf, &packet->mad, size))
> > + if (copy_to_user(buf, &packet->mad, hdr_size(file)))
> > + return -EFAULT;
> > +
> > + buf += hdr_size(file);
> > +
> > + if (copy_to_user(buf, packet->mad.data, packet->length))
> > return -EFAULT;
> >
> > return size;
> > @@ -324,7 +337,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
> > struct ib_umad_packet *packet;
> > ssize_t ret;
> >
> > - if (count < sizeof (struct ib_user_mad))
> > + if (count < hdr_size(file))
> > return -EINVAL;
> >
> > spin_lock_irq(&file->recv_lock);
> > @@ -348,9 +361,9 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
> > spin_unlock_irq(&file->recv_lock);
> >
> > if (packet->recv_wc)
> > - ret = copy_recv_mad(buf, packet, count);
> > + ret = copy_recv_mad(file, buf, packet, count);
> > else
> > - ret = copy_send_mad(buf, packet, count);
> > + ret = copy_send_mad(file, buf, packet, count);
> >
> > if (ret < 0) {
> > /* Requeue packet */
> > @@ -442,15 +455,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
> > __be64 *tid;
> > int ret, data_len, hdr_len, copy_offset, rmpp_active;
> >
> > - if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
> > + if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
> > return -EINVAL;
> >
> > packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
> > if (!packet)
> > return -ENOMEM;
> >
> > - if (copy_from_user(&packet->mad, buf,
> > - sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) {
> > + if (copy_from_user(&packet->mad, buf, hdr_size(file))) {
> > ret = -EFAULT;
> > goto err;
> > }
> > @@ -461,6 +473,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
> > goto err;
> > }
> >
> > + buf += hdr_size(file);
> > +
> > + if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) {
> > + ret = -EFAULT;
> > + goto err;
> > + }
> > +
> > down_read(&file->port->mutex);
> >
> > agent = __get_agent(file, packet->mad.hdr.id);
> > @@ -500,11 +519,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
> > IB_MGMT_RMPP_FLAG_ACTIVE;
> > }
> >
> > - data_len = count - sizeof (struct ib_user_mad) - hdr_len;
> > + data_len = count - hdr_size(file) - hdr_len;
> > packet->msg = ib_create_send_mad(agent,
> > be32_to_cpu(packet->mad.hdr.qpn),
> > - 0, rmpp_active, hdr_len,
> > - data_len, GFP_KERNEL);
> > + packet->mad.hdr.pkey_index, rmpp_active,
> > + hdr_len, data_len, GFP_KERNEL);
> > if (IS_ERR(packet->msg)) {
> > ret = PTR_ERR(packet->msg);
> > goto err_ah;
> > @@ -517,7 +536,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
> >
> > /* Copy MAD header. Any RMPP header is already in place. */
> > memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
> > - buf += sizeof (struct ib_user_mad);
> >
> > if (!rmpp_active) {
> > if (copy_from_user(packet->msg->mad + copy_offset,
> > @@ -646,6 +664,7 @@ found:
> > goto out;
> > }
> >
> > + file->already_used = 1;
> > file->agent[agent_id] = agent;
> > ret = 0;
> >
> > @@ -682,6 +701,20 @@ out:
> > return ret;
> > }
> >
> > +static long ib_umad_enable_pkey(struct ib_umad_file *file)
> > +{
> > + int ret = 0;
> > +
> > + down_write(&file->port->mutex);
> > + if (file->already_used)
> > + ret = -EINVAL;
> > + else
> > + file->use_pkey_index = 1;
> > + up_write(&file->port->mutex);
> > +
> > + return ret;
> > +}
> > +
> > static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
> > unsigned long arg)
> > {
> > @@ -690,6 +723,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
> > return ib_umad_reg_agent(filp->private_data, arg);
> > case IB_USER_MAD_UNREGISTER_AGENT:
> > return ib_umad_unreg_agent(filp->private_data, arg);
> > + case IB_USER_MAD_ENABLE_PKEY:
> > + return ib_umad_enable_pkey(filp->private_data);
> > default:
> > return -ENOIOCTLCMD;
> > }
> > diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h
> > index d66b15e..2a32043 100644
> > --- a/include/rdma/ib_user_mad.h
> > +++ b/include/rdma/ib_user_mad.h
> > @@ -52,7 +52,50 @@
> > */
> >
> > /**
> > + * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index
> > + * @id - ID of agent MAD received with/to be sent with
> > + * @status - 0 on successful receive, ETIMEDOUT if no response
> > + * received (transaction ID in data[] will be set to TID of original
> > + * request) (ignored on send)
> > + * @timeout_ms - Milliseconds to wait for response (unset on receive)
> > + * @retries - Number of automatic retries to attempt
> > + * @qpn - Remote QP number received from/to be sent to
> > + * @qkey - Remote Q_Key to be sent with (unset on receive)
> > + * @lid - Remote lid received from/to be sent to
> > + * @sl - Service level received with/to be sent with
> > + * @path_bits - Local path bits received with/to be sent with
> > + * @grh_present - If set, GRH was received/should be sent
> > + * @gid_index - Local GID index to send with (unset on receive)
> > + * @hop_limit - Hop limit in GRH
> > + * @traffic_class - Traffic class in GRH
> > + * @gid - Remote GID in GRH
> > + * @flow_label - Flow label in GRH
> > + */
> > +struct ib_user_mad_hdr_old {
> > + __u32 id;
> > + __u32 status;
> > + __u32 timeout_ms;
> > + __u32 retries;
> > + __u32 length;
> > + __be32 qpn;
> > + __be32 qkey;
> > + __be16 lid;
> > + __u8 sl;
> > + __u8 path_bits;
> > + __u8 grh_present;
> > + __u8 gid_index;
> > + __u8 hop_limit;
> > + __u8 traffic_class;
> > + __u8 gid[16];
> > + __be32 flow_label;
> > +};
> > +
> > +/**
> > * ib_user_mad_hdr - MAD packet header
> > + * This layout allows specifying/receiving the P_Key index. To use
> > + * this capability, an application must call the
> > + * IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before
> > + * any other actions with the file handle.
> > * @id - ID of agent MAD received with/to be sent with
> > * @status - 0 on successful receive, ETIMEDOUT if no response
> > * received (transaction ID in data[] will be set to TID of original
> > @@ -70,6 +113,7 @@
> > * @traffic_class - Traffic class in GRH
> > * @gid - Remote GID in GRH
> > * @flow_label - Flow label in GRH
> > + * @pkey_index - P_Key index
> > */
> > struct ib_user_mad_hdr {
> > __u32 id;
> > @@ -88,6 +132,8 @@ struct ib_user_mad_hdr {
> > __u8 traffic_class;
> > __u8 gid[16];
> > __be32 flow_label;
> > + __u16 pkey_index;
> > + __u8 reserved[6];
> > };
> >
> > /**
> > @@ -134,4 +180,6 @@ struct ib_user_mad_reg_req {
> >
> > #define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32)
> >
> > +#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3)
> > +
> > #endif /* IB_USER_MAD_H */
> > _______________________________________________
> > general mailing list
> > general at lists.openfabrics.org
> > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
> >
> > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
> >
>
More information about the general
mailing list