[ofa-general] [PATCH][RFC] P_Key support for umad

Hal Rosenstock hal.rosenstock at gmail.com
Mon Sep 10 19:03:50 PDT 2007


On 9/7/07, Roland Dreier <rdreier at cisco.com> wrote:
> Here is a long overdue patch to enable userspace to control the P_Key
> index used for userspace MADs.  I used the approach we discussed when
> this first came up, namely adding an ioctl to enable to the new
> interface so that existing binaries don't break.
>
> I haven't had a chance to make all the userspace library changes to
> test the new interface and I likely won't until I return home (I
> should be done traveling for a few months after this week).  I have
> tested existing code against a kernel with this patch applied and it
> seems to be OK, and I wanted to at least get this out for review as
> soon as I had it.
>
> Please review/test.  I would like to get this into 2.6.24 if possible
> since we've known so long that we needed it.

Thanks for doing this :-) One nit below in the doc.

I spent some time testing it today in old mode and although my
environment is limited, I did have trouble with an RMPP test as
follows:

Can someone try the following with OpenSM running:

First, osmtest -f c
and then
osmtest -f a

All on same node with new user_mad module.

That seems to hangup rather than complete for me. I didn't have time
to track this down any further.

-- Hal

> Thanks,
>  Roland
>
>
> diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
> index 8ec54b9..a3450aa 100644
> --- a/Documentation/infiniband/user_mad.txt
> +++ b/Documentation/infiniband/user_mad.txt
> @@ -99,6 +99,20 @@ Transaction IDs
>   request/response pairs.  The upper 32 bits are reserved for use by
>   the kernel and will be overwritten before a MAD is sent.
>
> +P_Key Index Handling
> +
> +  The old ib_umad interface did not allow setting the P_Key index for
> +  MADs that are sent and did not provide a way for obtaining the P_Key
> +  index of received MADs.  A new layout for struct ib_user_mad_hdr
> +  with a pkey_index member has been defined; however, to preserve
> +  binary compatibility with older applications, this new layout will
> +  not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called
> +  before a file description is used for anything else.

Nit: Should this be "file descriptor" ?

> +
> +  In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
> +  to 6, the new layout of struct ib_user_mad_hdr will be used by
> +  default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.
> +
>  Setting IsSM Capability Bit
>
>   To set the IsSM capability bit for a port, simply open the
> diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
> index d97ded2..3a0e579 100644
> --- a/drivers/infiniband/core/user_mad.c
> +++ b/drivers/infiniband/core/user_mad.c
> @@ -118,6 +118,8 @@ struct ib_umad_file {
>        wait_queue_head_t       recv_wait;
>        struct ib_mad_agent    *agent[IB_UMAD_MAX_AGENTS];
>        int                     agents_dead;
> +       u8                      use_pkey_index;
> +       u8                      already_used;
>  };
>
>  struct ib_umad_packet {
> @@ -147,6 +149,12 @@ static void ib_umad_release_dev(struct kref *ref)
>        kfree(dev);
>  }
>
> +static int hdr_size(struct ib_umad_file *file)
> +{
> +       return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
> +               sizeof (struct ib_user_mad_hdr_old);
> +}
> +
>  /* caller must hold port->mutex at least for reading */
>  static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
>  {
> @@ -221,13 +229,13 @@ static void recv_handler(struct ib_mad_agent *agent,
>        packet->length = mad_recv_wc->mad_len;
>        packet->recv_wc = mad_recv_wc;
>
> -       packet->mad.hdr.status    = 0;
> -       packet->mad.hdr.length    = sizeof (struct ib_user_mad) +
> -                                   mad_recv_wc->mad_len;
> -       packet->mad.hdr.qpn       = cpu_to_be32(mad_recv_wc->wc->src_qp);
> -       packet->mad.hdr.lid       = cpu_to_be16(mad_recv_wc->wc->slid);
> -       packet->mad.hdr.sl        = mad_recv_wc->wc->sl;
> -       packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
> +       packet->mad.hdr.status     = 0;
> +       packet->mad.hdr.length     = hdr_size(file) + mad_recv_wc->mad_len;
> +       packet->mad.hdr.qpn        = cpu_to_be32(mad_recv_wc->wc->src_qp);
> +       packet->mad.hdr.lid        = cpu_to_be16(mad_recv_wc->wc->slid);
> +       packet->mad.hdr.sl         = mad_recv_wc->wc->sl;
> +       packet->mad.hdr.path_bits  = mad_recv_wc->wc->dlid_path_bits;
> +       packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
>        packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
>        if (packet->mad.hdr.grh_present) {
>                struct ib_ah_attr ah_attr;
> @@ -253,8 +261,8 @@ err1:
>        ib_free_recv_mad(mad_recv_wc);
>  }
>
> -static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
> -                            size_t count)
> +static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf,
> +                            struct ib_umad_packet *packet, size_t count)
>  {
>        struct ib_mad_recv_buf *recv_buf;
>        int left, seg_payload, offset, max_seg_payload;
> @@ -262,15 +270,15 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
>        /* We need enough room to copy the first (or only) MAD segment. */
>        recv_buf = &packet->recv_wc->recv_buf;
>        if ((packet->length <= sizeof (*recv_buf->mad) &&
> -            count < sizeof (packet->mad) + packet->length) ||
> +            count < hdr_size(file) + packet->length) ||
>            (packet->length > sizeof (*recv_buf->mad) &&
> -            count < sizeof (packet->mad) + sizeof (*recv_buf->mad)))
> +            count < hdr_size(file) + sizeof (*recv_buf->mad)))
>                return -EINVAL;
>
> -       if (copy_to_user(buf, &packet->mad, sizeof (packet->mad)))
> +       if (copy_to_user(buf, &packet->mad, hdr_size(file)))
>                return -EFAULT;
>
> -       buf += sizeof (packet->mad);
> +       buf += hdr_size(file);
>        seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad));
>        if (copy_to_user(buf, recv_buf->mad, seg_payload))
>                return -EFAULT;
> @@ -280,7 +288,7 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
>                 * Multipacket RMPP MAD message. Copy remainder of message.
>                 * Note that last segment may have a shorter payload.
>                 */
> -               if (count < sizeof (packet->mad) + packet->length) {
> +               if (count < hdr_size(file) + packet->length) {
>                        /*
>                         * The buffer is too small, return the first RMPP segment,
>                         * which includes the RMPP message length.
> @@ -300,18 +308,23 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
>                                return -EFAULT;
>                }
>        }
> -       return sizeof (packet->mad) + packet->length;
> +       return hdr_size(file) + packet->length;
>  }
>
> -static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet,
> -                            size_t count)
> +static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf,
> +                            struct ib_umad_packet *packet, size_t count)
>  {
> -       ssize_t size = sizeof (packet->mad) + packet->length;
> +       ssize_t size = hdr_size(file) + packet->length;
>
>        if (count < size)
>                return -EINVAL;
>
> -       if (copy_to_user(buf, &packet->mad, size))
> +       if (copy_to_user(buf, &packet->mad, hdr_size(file)))
> +               return -EFAULT;
> +
> +       buf += hdr_size(file);
> +
> +       if (copy_to_user(buf, packet->mad.data, packet->length))
>                return -EFAULT;
>
>        return size;
> @@ -324,7 +337,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
>        struct ib_umad_packet *packet;
>        ssize_t ret;
>
> -       if (count < sizeof (struct ib_user_mad))
> +       if (count < hdr_size(file))
>                return -EINVAL;
>
>        spin_lock_irq(&file->recv_lock);
> @@ -348,9 +361,9 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
>        spin_unlock_irq(&file->recv_lock);
>
>        if (packet->recv_wc)
> -               ret = copy_recv_mad(buf, packet, count);
> +               ret = copy_recv_mad(file, buf, packet, count);
>        else
> -               ret = copy_send_mad(buf, packet, count);
> +               ret = copy_send_mad(file, buf, packet, count);
>
>        if (ret < 0) {
>                /* Requeue packet */
> @@ -442,15 +455,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
>        __be64 *tid;
>        int ret, data_len, hdr_len, copy_offset, rmpp_active;
>
> -       if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
> +       if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
>                return -EINVAL;
>
>        packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
>        if (!packet)
>                return -ENOMEM;
>
> -       if (copy_from_user(&packet->mad, buf,
> -                           sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) {
> +       if (copy_from_user(&packet->mad, buf, hdr_size(file))) {
>                ret = -EFAULT;
>                goto err;
>        }
> @@ -461,6 +473,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
>                goto err;
>        }
>
> +       buf += hdr_size(file);
> +
> +       if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) {
> +               ret = -EFAULT;
> +               goto err;
> +       }
> +
>        down_read(&file->port->mutex);
>
>        agent = __get_agent(file, packet->mad.hdr.id);
> @@ -500,11 +519,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
>                              IB_MGMT_RMPP_FLAG_ACTIVE;
>        }
>
> -       data_len = count - sizeof (struct ib_user_mad) - hdr_len;
> +       data_len = count - hdr_size(file) - hdr_len;
>        packet->msg = ib_create_send_mad(agent,
>                                         be32_to_cpu(packet->mad.hdr.qpn),
> -                                        0, rmpp_active, hdr_len,
> -                                        data_len, GFP_KERNEL);
> +                                        packet->mad.hdr.pkey_index, rmpp_active,
> +                                        hdr_len, data_len, GFP_KERNEL);
>        if (IS_ERR(packet->msg)) {
>                ret = PTR_ERR(packet->msg);
>                goto err_ah;
> @@ -517,7 +536,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
>
>        /* Copy MAD header.  Any RMPP header is already in place. */
>        memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
> -       buf += sizeof (struct ib_user_mad);
>
>        if (!rmpp_active) {
>                if (copy_from_user(packet->msg->mad + copy_offset,
> @@ -646,6 +664,7 @@ found:
>                goto out;
>        }
>
> +       file->already_used    = 1;
>        file->agent[agent_id] = agent;
>        ret = 0;
>
> @@ -682,6 +701,20 @@ out:
>        return ret;
>  }
>
> +static long ib_umad_enable_pkey(struct ib_umad_file *file)
> +{
> +       int ret = 0;
> +
> +       down_write(&file->port->mutex);
> +       if (file->already_used)
> +               ret = -EINVAL;
> +       else
> +               file->use_pkey_index = 1;
> +       up_write(&file->port->mutex);
> +
> +       return ret;
> +}
> +
>  static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
>                          unsigned long arg)
>  {
> @@ -690,6 +723,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
>                return ib_umad_reg_agent(filp->private_data, arg);
>        case IB_USER_MAD_UNREGISTER_AGENT:
>                return ib_umad_unreg_agent(filp->private_data, arg);
> +       case IB_USER_MAD_ENABLE_PKEY:
> +               return ib_umad_enable_pkey(filp->private_data);
>        default:
>                return -ENOIOCTLCMD;
>        }
> diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h
> index d66b15e..2a32043 100644
> --- a/include/rdma/ib_user_mad.h
> +++ b/include/rdma/ib_user_mad.h
> @@ -52,7 +52,50 @@
>  */
>
>  /**
> + * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index
> + * @id - ID of agent MAD received with/to be sent with
> + * @status - 0 on successful receive, ETIMEDOUT if no response
> + *   received (transaction ID in data[] will be set to TID of original
> + *   request) (ignored on send)
> + * @timeout_ms - Milliseconds to wait for response (unset on receive)
> + * @retries - Number of automatic retries to attempt
> + * @qpn - Remote QP number received from/to be sent to
> + * @qkey - Remote Q_Key to be sent with (unset on receive)
> + * @lid - Remote lid received from/to be sent to
> + * @sl - Service level received with/to be sent with
> + * @path_bits - Local path bits received with/to be sent with
> + * @grh_present - If set, GRH was received/should be sent
> + * @gid_index - Local GID index to send with (unset on receive)
> + * @hop_limit - Hop limit in GRH
> + * @traffic_class - Traffic class in GRH
> + * @gid - Remote GID in GRH
> + * @flow_label - Flow label in GRH
> + */
> +struct ib_user_mad_hdr_old {
> +       __u32   id;
> +       __u32   status;
> +       __u32   timeout_ms;
> +       __u32   retries;
> +       __u32   length;
> +       __be32  qpn;
> +       __be32  qkey;
> +       __be16  lid;
> +       __u8    sl;
> +       __u8    path_bits;
> +       __u8    grh_present;
> +       __u8    gid_index;
> +       __u8    hop_limit;
> +       __u8    traffic_class;
> +       __u8    gid[16];
> +       __be32  flow_label;
> +};
> +
> +/**
>  * ib_user_mad_hdr - MAD packet header
> + *   This layout allows specifying/receiving the P_Key index.  To use
> + *   this capability, an application must call the
> + *   IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before
> + *   any other actions with the file handle.
>  * @id - ID of agent MAD received with/to be sent with
>  * @status - 0 on successful receive, ETIMEDOUT if no response
>  *   received (transaction ID in data[] will be set to TID of original
> @@ -70,6 +113,7 @@
>  * @traffic_class - Traffic class in GRH
>  * @gid - Remote GID in GRH
>  * @flow_label - Flow label in GRH
> + * @pkey_index - P_Key index
>  */
>  struct ib_user_mad_hdr {
>        __u32   id;
> @@ -88,6 +132,8 @@ struct ib_user_mad_hdr {
>        __u8    traffic_class;
>        __u8    gid[16];
>        __be32  flow_label;
> +       __u16   pkey_index;
> +       __u8    reserved[6];
>  };
>
>  /**
> @@ -134,4 +180,6 @@ struct ib_user_mad_reg_req {
>
>  #define IB_USER_MAD_UNREGISTER_AGENT   _IOW(IB_IOCTL_MAGIC, 2, __u32)
>
> +#define IB_USER_MAD_ENABLE_PKEY                _IO(IB_IOCTL_MAGIC, 3)
> +
>  #endif /* IB_USER_MAD_H */
> _______________________________________________
> general mailing list
> general at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
>
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
>



More information about the general mailing list