[ofa-general] Re: [PATCH 1/2] libibumad: fix partition support
Hal Rosenstock
halr at voltaire.com
Wed Jun 20 14:11:21 PDT 2007
On Wed, 2007-06-20 at 17:01, Hal Rosenstock wrote:
> On Fri, 2007-06-15 at 12:59, Sean Hefty wrote:
> > Allow sending MADs on different partitions. This requires kernel support,
> > so requires an ABI bump. This patch maintains support for the previous
> > ABI.
> >
> > Clarify that umad_set_pkey() takes a pkey index, and not the pkey itself.
> > (Unfortunately, the call is used both ways in the management tree.)
>
> This works well in all the combinatorials I tested (user_mad ABIs,
> libibumad and libvendor versions).
>
> Just two things:
> 1. It might be better if the ABI version 5 warning message for only
> pkey_index 0 being supported comes out at umad_init time rather than
> umad_set_pkey time so that the user is not swamped with these.
>
> 2. There is one pathological combination. It would be using 2.6.23 (with
> the new user_mad ABI version 6), an updated libibumad would be required,
> but an older libvendor (osm_vendor_ibumad.c without your one line
> change). That might be the case with someone who swapped back and forth
> between OFED 1.2 and master in some scenarios.
This begs the question as to whether your one line change to
osm_vendor_ibumad.c should be made to the OFED 1.2 version as well.
-- Hal
> Also, this does not quite work as expected. An error was returned based
> on the bad pkey index but I do see a send on the IB link (with a bad
> pkey). I wouldn't have expected the latter part. Maybe this is a driver
> or firmware issue. Not sure yet. I suppose there should be some
> pkey_index validation (to make sure it is within the device's valid
> range) and that should also ultimately get added to libibumad or should
> such validation go into the user_mad kernel module ?
>
> -- Hal
>
> > Signed-off-by: Sean Hefty <sean.hefty at intel.com>
> > ---
> > Additional changes are needed to retrieve the PKey and GID tables, so that
> > the PKeys and GIDs can be converted to the correct index. These will come
> > in future patches.
> >
> >
> > doc/libibumad.txt | 2
> > libibumad/include/infiniband/umad.h | 7 +
> > libibumad/src/umad.c | 192 +++++++++++++++++++++++++++--------
> > 3 files changed, 156 insertions(+), 45 deletions(-)
> >
> > diff --git a/doc/libibumad.txt b/doc/libibumad.txt
> > index 7b2b4f4..4e37e60 100644
> > --- a/doc/libibumad.txt
> > +++ b/doc/libibumad.txt
> > @@ -336,7 +336,7 @@ the given host ordered fields. Return 0 on success, -1 on errors.
> > umad_set_pkey:
> >
> > Synopsis:
> > - int umad_set_pkey(void *umad, int pkey);
> > + int umad_set_pkey(void *umad, int pkey_index);
> >
> > Description: Set the pkey within the 'umad' buffer. Return 0 on success,
> > -1 on errors.
> > diff --git a/libibumad/include/infiniband/umad.h b/libibumad/include/infiniband/umad.h
> > old mode 100644
> > new mode 100755
> > index 9020649..9369d95
> > --- a/libibumad/include/infiniband/umad.h
> > +++ b/libibumad/include/infiniband/umad.h
> > @@ -60,6 +60,8 @@ typedef struct ib_mad_addr {
> > uint8_t traffic_class;
> > uint8_t gid[16];
> > uint32_t flow_label;
> > + uint16_t pkey_index;
> > + uint8_t reserved[6];
> > } ib_mad_addr_t;
> >
> > typedef struct ib_user_mad {
> > @@ -72,7 +74,8 @@ typedef struct ib_user_mad {
> > uint8_t data[0];
> > } ib_user_mad_t;
> >
> > -#define IB_UMAD_ABI_VERSION 5
> > +#define IB_UMAD_MIN_ABI_VERSION 5
> > +#define IB_UMAD_MAX_ABI_VERSION 6
> > #define IB_UMAD_ABI_DIR "/sys/class/infiniband_mad"
> > #define IB_UMAD_ABI_FILE "abi_version"
> >
> > @@ -167,7 +170,7 @@ int umad_set_grh_net(void *umad, void *mad_addr);
> > int umad_set_grh(void *umad, void *mad_addr);
> > int umad_set_addr_net(void *umad, int dlid, int dqp, int sl, int qkey);
> > int umad_set_addr(void *umad, int dlid, int dqp, int sl, int qkey);
> > -int umad_set_pkey(void *umad, int pkey);
> > +int umad_set_pkey(void *umad, int pkey_index);
> >
> > int umad_send(int portid, int agentid, void *umad, int length,
> > int timeout_ms, int retries);
> > diff --git a/libibumad/src/umad.c b/libibumad/src/umad.c
> > old mode 100644
> > new mode 100755
> > index 5f9b36b..c750fe0
> > --- a/libibumad/src/umad.c
> > +++ b/libibumad/src/umad.c
> > @@ -69,6 +69,7 @@ int umaddebug = 0;
> > #define UMAD_DEV_NAME_SZ 32
> > #define UMAD_DEV_FILE_SZ 256
> >
> > +static uint abi_version;
> > static char *def_ca_name = "mthca0";
> > static int def_ca_port = 1;
> >
> > @@ -82,6 +83,31 @@ typedef struct Port {
> >
> > static Port ports[UMAD_MAX_PORTS];
> >
> > +typedef struct ib_mad_addr_abi_5 {
> > + uint32_t qpn;
> > + uint32_t qkey;
> > + uint16_t lid;
> > + uint8_t sl;
> > + uint8_t path_bits;
> > + uint8_t grh_present;
> > + uint8_t gid_index;
> > + uint8_t hop_limit;
> > + uint8_t traffic_class;
> > + uint8_t gid[16];
> > + uint32_t flow_label;
> > +} ib_mad_addr_abi_5_t;
> > +
> > +typedef struct ib_user_mad_abi_5 {
> > + uint32_t agent_id;
> > + uint32_t status;
> > + uint32_t timeout_ms;
> > + uint32_t retries;
> > + uint32_t length;
> > + ib_mad_addr_abi_5_t addr;
> > + uint8_t data[0];
> > +} ib_user_mad_abi_5_t;
> > +
> > +
> > /*************************************
> > * Port
> > */
> > @@ -463,6 +489,101 @@ dev_to_umad_id(char *dev, uint port)
> > return -1; /* not found */
> > }
> >
> > +static int
> > +write_data(int fd, void *data, int size)
> > +{
> > + int n;
> > +
> > + n = write(fd, data, size);
> > + if (n != size) {
> > + DEBUG("write returned %d != sizeof mad data %d (%m)", n, size);
> > + if (!errno)
> > + errno = EIO;
> > + return -EIO;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static int
> > +write_abi_5(int fd, struct ib_user_mad *mad, int length)
> > +{
> > + struct ib_user_mad_abi_5 *umad_5;
> > + int n;
> > +
> > + n = sizeof *umad_5 + length;
> > + umad_5 = malloc(n);
> > + if (!umad_5) {
> > + errno = ENOMEM;
> > + return -ENOMEM;
> > + }
> > +
> > + memcpy(umad_5, mad, sizeof *umad_5);
> > + memcpy(umad_5->data, mad->data, length);
> > +
> > + n = write_data(fd, umad_5, n);
> > + free(umad_5);
> > + return n;
> > +}
> > +
> > +static int
> > +read_data(int fd, void *data, int size, int *length)
> > +{
> > + struct ib_user_mad *mad = data;
> > + int n, umad_size;
> > +
> > + umad_size = size - *length;
> > +
> > + n = read(fd, data, size);
> > + if ((n >= 0) && (n <= size)) {
> > + DEBUG("mad received by agent %d length %d", mad->agent_id, n);
> > + if (n > umad_size)
> > + *length = n - umad_size;
> > + else
> > + *length = 0;
> > + return mad->agent_id;
> > + }
> > +
> > + if (n == -EWOULDBLOCK) {
> > + if (!errno)
> > + errno = EWOULDBLOCK;
> > + return n;
> > + }
> > +
> > + DEBUG("read returned %zu > sizeof mad %zu (%m)",
> > + mad->length - umad_size, *length);
> > +
> > + *length = mad->length - umad_size;
> > + if (!errno)
> > + errno = EIO;
> > + return -errno;
> > +}
> > +
> > +static int
> > +read_abi_5(int fd, void *umad, int *length)
> > +{
> > + struct ib_user_mad *mad = umad;
> > + struct ib_user_mad_abi_5 *umad_5;
> > + int n;
> > +
> > + n = sizeof *umad_5 + *length;
> > + umad_5 = malloc(n);
> > + if (!umad_5) {
> > + errno = EINVAL;
> > + return -EINVAL;
> > + }
> > +
> > + n = read_data(fd, umad_5, n, length);
> > + if (n >= 0) {
> > + memcpy(mad, umad_5, sizeof *umad_5);
> > + mad->addr.pkey_index = 0;
> > + memcpy(mad->data, umad_5->data, *length);
> > + }
> > +
> > + free(umad_5);
> > + return n;
> > +}
> > +
> > /*******************************
> > * Public interface
> > */
> > @@ -470,17 +591,19 @@ dev_to_umad_id(char *dev, uint port)
> > int
> > umad_init(void)
> > {
> > - uint abi_version;
> > -
> > TRACE("umad_init");
> > if (sys_read_uint(IB_UMAD_ABI_DIR, IB_UMAD_ABI_FILE, &abi_version) < 0) {
> > IBWARN("can't read ABI version from %s/%s (%m): is ib_umad module loaded?",
> > IB_UMAD_ABI_DIR, IB_UMAD_ABI_FILE);
> > return -1;
> > }
> > - if (abi_version != IB_UMAD_ABI_VERSION) {
> > - IBWARN("wrong ABI version: %s/%s is %d but library ABI is %d",
> > - IB_UMAD_ABI_DIR, IB_UMAD_ABI_FILE, abi_version, IB_UMAD_ABI_VERSION);
> > +
> > + if (abi_version < IB_UMAD_MIN_ABI_VERSION ||
> > + abi_version > IB_UMAD_MAX_ABI_VERSION) {
> > + IBWARN("wrong ABI version: %s/%s is %d but library ABI "
> > + "supports %d through %d",
> > + IB_UMAD_ABI_DIR, IB_UMAD_ABI_FILE, abi_version,
> > + IB_UMAD_MIN_ABI_VERSION, IB_UMAD_MAX_ABI_VERSION);
> > return -1;
> > }
> > return 0;
> > @@ -699,11 +822,16 @@ umad_set_grh(void *umad, void *mad_addr)
> > }
> >
> > int
> > -umad_set_pkey(void *umad, int pkey)
> > +umad_set_pkey(void *umad, int pkey_index)
> > {
> > -#if 0
> > - mad->addr.pkey = 0; /* FIXME - PKEY support */
> > -#endif
> > + struct ib_user_mad *mad = umad;
> > +
> > + if (abi_version == 5 && pkey_index != 0) {
> > + IBWARN("umad_set_pkey: ABI 5 only supports pkey_index 0\n");
> > + return -EINVAL;
> > + }
> > +
> > + mad->addr.pkey_index = pkey_index;
> > return 0;
> > }
> >
> > @@ -761,15 +889,12 @@ umad_send(int portid, int agentid, void *umad, int length,
> > if (umaddebug > 1)
> > umad_dump(mad);
> >
> > - n = write(port->dev_fd, mad, length + sizeof *mad);
> > - if (n == length + sizeof *mad)
> > - return 0;
> > + if (abi_version == 5)
> > + n = write_abi_5(port->dev_fd, mad, length);
> > + else
> > + n = write_data(port->dev_fd, mad, sizeof *mad + length);
> >
> > - DEBUG("write returned %d != sizeof umad %zu + length %d (%m)",
> > - n, sizeof *mad, length);
> > - if (!errno)
> > - errno = EIO;
> > - return -EIO;
> > + return n;
> > }
> >
> > static int
> > @@ -793,7 +918,6 @@ dev_poll(int fd, int timeout_ms)
> > int
> > umad_recv(int portid, void *umad, int *length, int timeout_ms)
> > {
> > - struct ib_user_mad *mad = umad;
> > Port *port;
> > int n;
> >
> > @@ -817,29 +941,13 @@ umad_recv(int portid, void *umad, int *length, int timeout_ms)
> > return n;
> > }
> >
> > - n = read(port->dev_fd, umad, sizeof *mad + *length);
> > - if ((n >= 0) && (n <= sizeof *mad + *length)) {
> > - DEBUG("mad received by agent %d length %d", mad->agent_id, n);
> > - if (n > sizeof *mad)
> > - *length = n - sizeof *mad;
> > - else
> > - *length = 0;
> > - return mad->agent_id;
> > - }
> > -
> > - if (n == -EWOULDBLOCK) {
> > - if (!errno)
> > - errno = EWOULDBLOCK;
> > - return n;
> > - }
> > -
> > - DEBUG("read returned %zu > sizeof umad %zu + length %d (%m)",
> > - mad->length - sizeof *mad, sizeof *mad, *length);
> > + if (abi_version == 5)
> > + n = read_abi_5(port->dev_fd, umad, length);
> > + else
> > + n = read_data(port->dev_fd, umad,
> > + sizeof(struct ib_user_mad) + *length, length);
> >
> > - *length = mad->length - sizeof *mad;
> > - if (!errno)
> > - errno = EIO;
> > - return -errno;
> > + return n;
> > }
> >
> > int
> > @@ -996,10 +1104,10 @@ umad_addr_dump(ib_mad_addr_t *addr)
> > gid_str[i*2] = 0;
> > IBWARN("qpn %d qkey 0x%x lid 0x%x sl %d\n"
> > "grh_present %d gid_index %d hop_limit %d traffic_class %d flow_label 0x%x\n"
> > - "Gid 0x%s",
> > + "Gid 0x%s pkey_index %d",
> > ntohl(addr->qpn), ntohl(addr->qkey), ntohs(addr->lid), addr->sl,
> > addr->grh_present, (int)addr->gid_index, (int)addr->hop_limit,
> > - (int)addr->traffic_class, addr->flow_label, gid_str);
> > + (int)addr->traffic_class, addr->flow_label, gid_str, addr->pkey_index);
> > }
> >
> > void
> >
>
> _______________________________________________
> general mailing list
> general at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
>
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
More information about the general
mailing list