[ofa-general] Re: [PATCH 1/2] libibumad: fix partition support

Hal Rosenstock halr at voltaire.com
Wed Jun 20 14:01:21 PDT 2007


On Fri, 2007-06-15 at 12:59, Sean Hefty wrote:
> Allow sending MADs on different partitions.  This requires kernel support,
> so requires an ABI bump.  This patch maintains support for the previous
> ABI.
> 
> Clarify that umad_set_pkey() takes a pkey index, and not the pkey itself.
> (Unfortunately, the call is used both ways in the management tree.)

This works well in all the combinatorials I tested (user_mad ABIs,
libibumad and libvendor versions).

Just two things:
1. It might be better if the ABI version 5 warning message for only
pkey_index 0 being supported comes out at umad_init time rather than
umad_set_pkey time so that the user is not swamped with these.

2. There is one pathological combination. It would be using 2.6.23 (with
the new user_mad ABI version 6), an updated libibumad would be required,
but an older libvendor (osm_vendor_ibumad.c without your one line
change). That might be the case with someone who swapped back and forth
between OFED 1.2 and master in some scenarios.

Also, this does not quite work as expected. An error was returned based
on the bad pkey index but I do see a send on the IB link (with a bad
pkey). I wouldn't have expected the latter part. Maybe this is a driver
or firmware issue. Not sure yet. I suppose there should be some
pkey_index validation (to make sure it is within the device's valid
range) and that should also ultimately get added to libibumad or should
such validation go into the user_mad kernel module ?

-- Hal

> Signed-off-by: Sean Hefty <sean.hefty at intel.com>
> ---
> Additional changes are needed to retrieve the PKey and GID tables, so that
> the PKeys and GIDs can be converted to the correct index.  These will come
> in future patches.
> 
> 
>  doc/libibumad.txt                   |    2 
>  libibumad/include/infiniband/umad.h |    7 +
>  libibumad/src/umad.c                |  192 +++++++++++++++++++++++++++--------
>  3 files changed, 156 insertions(+), 45 deletions(-)
> 
> diff --git a/doc/libibumad.txt b/doc/libibumad.txt
> index 7b2b4f4..4e37e60 100644
> --- a/doc/libibumad.txt
> +++ b/doc/libibumad.txt
> @@ -336,7 +336,7 @@ the given host ordered fields. Return 0 on success, -1 on errors.
>  umad_set_pkey:
>  
>  Synopsis:
> -	int	umad_set_pkey(void *umad, int pkey);
> +	int	umad_set_pkey(void *umad, int pkey_index);
>  
>  Description: Set the pkey within the 'umad' buffer.  Return 0 on success,
>  -1 on errors.
> diff --git a/libibumad/include/infiniband/umad.h b/libibumad/include/infiniband/umad.h
> old mode 100644
> new mode 100755
> index 9020649..9369d95
> --- a/libibumad/include/infiniband/umad.h
> +++ b/libibumad/include/infiniband/umad.h
> @@ -60,6 +60,8 @@ typedef struct ib_mad_addr {
>  	uint8_t	 traffic_class;
>  	uint8_t	 gid[16];
>  	uint32_t flow_label;
> +	uint16_t pkey_index;
> +	uint8_t  reserved[6];
>  } ib_mad_addr_t;
>  
>  typedef struct ib_user_mad {
> @@ -72,7 +74,8 @@ typedef struct ib_user_mad {
>  	uint8_t  data[0];
>  } ib_user_mad_t;
>  
> -#define IB_UMAD_ABI_VERSION	5
> +#define IB_UMAD_MIN_ABI_VERSION	5
> +#define IB_UMAD_MAX_ABI_VERSION	6
>  #define IB_UMAD_ABI_DIR		"/sys/class/infiniband_mad"
>  #define IB_UMAD_ABI_FILE	"abi_version"
>  
> @@ -167,7 +170,7 @@ int	umad_set_grh_net(void *umad, void *mad_addr);
>  int	umad_set_grh(void *umad, void *mad_addr);
>  int	umad_set_addr_net(void *umad, int dlid, int dqp, int sl, int qkey);
>  int	umad_set_addr(void *umad, int dlid, int dqp, int sl, int qkey);
> -int	umad_set_pkey(void *umad, int pkey);
> +int	umad_set_pkey(void *umad, int pkey_index);
>  
>  int	umad_send(int portid, int agentid, void *umad, int length,
>  		  int timeout_ms, int retries);
> diff --git a/libibumad/src/umad.c b/libibumad/src/umad.c
> old mode 100644
> new mode 100755
> index 5f9b36b..c750fe0
> --- a/libibumad/src/umad.c
> +++ b/libibumad/src/umad.c
> @@ -69,6 +69,7 @@ int umaddebug = 0;
>  #define UMAD_DEV_NAME_SZ	32
>  #define UMAD_DEV_FILE_SZ	256
>  
> +static uint abi_version;
>  static char *def_ca_name = "mthca0";
>  static int def_ca_port = 1;
>  
> @@ -82,6 +83,31 @@ typedef struct Port {
>  
>  static Port ports[UMAD_MAX_PORTS];
>  
> +typedef struct ib_mad_addr_abi_5 {
> +	uint32_t qpn;
> +	uint32_t qkey;
> +	uint16_t lid;
> +	uint8_t	 sl;
> +	uint8_t	 path_bits;
> +	uint8_t	 grh_present;
> +	uint8_t	 gid_index;
> +	uint8_t	 hop_limit;
> +	uint8_t	 traffic_class;
> +	uint8_t	 gid[16];
> +	uint32_t flow_label;
> +} ib_mad_addr_abi_5_t;
> +
> +typedef struct ib_user_mad_abi_5 {
> +	uint32_t agent_id;
> +	uint32_t status;
> +	uint32_t timeout_ms;
> +	uint32_t retries;
> +	uint32_t length;
> +	ib_mad_addr_abi_5_t addr;
> +	uint8_t  data[0];
> +} ib_user_mad_abi_5_t;
> +
> +
>  /*************************************
>   * Port
>   */
> @@ -463,6 +489,101 @@ dev_to_umad_id(char *dev, uint port)
>  	return -1;	/* not found */
>  }
>  
> +static int
> +write_data(int fd, void *data, int size)
> +{
> +	int n;
> +
> +	n = write(fd, data, size);
> +	if (n != size) {
> +		DEBUG("write returned %d != sizeof mad data %d (%m)", n, size);
> +		if (!errno)
> +			errno = EIO;
> +		return -EIO;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +write_abi_5(int fd, struct ib_user_mad *mad, int length)
> +{
> +	struct ib_user_mad_abi_5 *umad_5;
> +	int n;
> +
> +	n = sizeof *umad_5 + length;
> +	umad_5 = malloc(n);
> +	if (!umad_5) {
> +		errno = ENOMEM;
> +		return -ENOMEM;
> +	}
> +
> +	memcpy(umad_5, mad, sizeof *umad_5);
> +	memcpy(umad_5->data, mad->data, length);
> +
> +	n = write_data(fd, umad_5, n);
> +	free(umad_5);
> +	return n;
> +}
> +
> +static int
> +read_data(int fd, void *data, int size, int *length)
> +{
> +	struct ib_user_mad *mad = data;
> +	int n, umad_size;
> +
> +	umad_size = size - *length;
> +
> +	n = read(fd, data, size);
> +	if ((n >= 0) && (n <= size)) {
> +		DEBUG("mad received by agent %d length %d", mad->agent_id, n);
> +		if (n > umad_size)
> +			*length = n - umad_size;
> +		else
> +			*length = 0;
> +		return mad->agent_id;
> +	}
> +
> +	if (n == -EWOULDBLOCK) {
> +		if (!errno)
> +			errno = EWOULDBLOCK;
> +		return n;
> +	}
> +
> +	DEBUG("read returned %zu > sizeof mad %zu (%m)",
> +	      mad->length - umad_size, *length);
> +
> +	*length = mad->length - umad_size;
> +	if (!errno)
> +		errno = EIO;
> +	return -errno;
> +}
> +
> +static int
> +read_abi_5(int fd, void *umad, int *length)
> +{
> +	struct ib_user_mad *mad = umad;
> +	struct ib_user_mad_abi_5 *umad_5;
> +	int n;
> +
> +	n = sizeof *umad_5 + *length;
> +	umad_5 = malloc(n);
> +	if (!umad_5) {
> +		errno = EINVAL;
> +		return -EINVAL;
> +	}
> +
> +	n = read_data(fd, umad_5, n, length);
> +	if (n >= 0) {
> +		memcpy(mad, umad_5, sizeof *umad_5);
> +		mad->addr.pkey_index = 0;
> +		memcpy(mad->data, umad_5->data, *length);
> +	}
> +
> +	free(umad_5);
> +	return n;
> +}
> +
>  /*******************************
>   * Public interface
>   */
> @@ -470,17 +591,19 @@ dev_to_umad_id(char *dev, uint port)
>  int
>  umad_init(void)
>  {
> -	uint abi_version;
> -
>  	TRACE("umad_init");
>  	if (sys_read_uint(IB_UMAD_ABI_DIR, IB_UMAD_ABI_FILE, &abi_version) < 0) {
>  		IBWARN("can't read ABI version from %s/%s (%m): is ib_umad module loaded?",
>  			IB_UMAD_ABI_DIR, IB_UMAD_ABI_FILE);
>  		return -1;
>  	}
> -	if (abi_version != IB_UMAD_ABI_VERSION) {
> -		IBWARN("wrong ABI version: %s/%s is %d but library ABI is %d",
> -			IB_UMAD_ABI_DIR, IB_UMAD_ABI_FILE, abi_version, IB_UMAD_ABI_VERSION);
> +
> +	if (abi_version < IB_UMAD_MIN_ABI_VERSION ||
> +	    abi_version > IB_UMAD_MAX_ABI_VERSION) {
> +		IBWARN("wrong ABI version: %s/%s is %d but library ABI "
> +			"supports %d through %d",
> +			IB_UMAD_ABI_DIR, IB_UMAD_ABI_FILE, abi_version,
> +			IB_UMAD_MIN_ABI_VERSION, IB_UMAD_MAX_ABI_VERSION);
>  		return -1;
>  	}
>  	return 0;
> @@ -699,11 +822,16 @@ umad_set_grh(void *umad, void *mad_addr)
>  }
>  
>  int
> -umad_set_pkey(void *umad, int pkey)
> +umad_set_pkey(void *umad, int pkey_index)
>  {
> -#if 0
> -	mad->addr.pkey = 0;		/* FIXME - PKEY support */
> -#endif
> +	struct ib_user_mad *mad = umad;
> +
> +	if (abi_version == 5 && pkey_index != 0) {
> +		IBWARN("umad_set_pkey: ABI 5 only supports pkey_index 0\n");
> +		return -EINVAL;
> +	}
> +
> +	mad->addr.pkey_index = pkey_index;
>  	return 0;
>  }
>  
> @@ -761,15 +889,12 @@ umad_send(int portid, int agentid, void *umad, int length,
>  	if (umaddebug > 1)
>  		umad_dump(mad);
>  
> -	n = write(port->dev_fd, mad, length + sizeof *mad);
> -	if (n == length + sizeof *mad)
> -		return 0;
> +	if (abi_version == 5)
> +		n = write_abi_5(port->dev_fd, mad, length);
> +	else
> +		n = write_data(port->dev_fd, mad, sizeof *mad + length);
>  
> -	DEBUG("write returned %d != sizeof umad %zu + length %d (%m)",
> -	      n, sizeof *mad, length);
> -	if (!errno)
> -		errno = EIO;
> -	return -EIO;
> +	return n;
>  }
>  
>  static int
> @@ -793,7 +918,6 @@ dev_poll(int fd, int timeout_ms)
>  int
>  umad_recv(int portid, void *umad, int *length, int timeout_ms)
>  {
> -	struct ib_user_mad *mad = umad;
>  	Port *port;
>  	int n;
>  
> @@ -817,29 +941,13 @@ umad_recv(int portid, void *umad, int *length, int timeout_ms)
>  		return n;
>  	}
>  
> -	n = read(port->dev_fd, umad, sizeof *mad + *length);
> -	if ((n >= 0) && (n <= sizeof *mad + *length)) {
> -		DEBUG("mad received by agent %d length %d", mad->agent_id, n);
> -		if (n > sizeof *mad)
> -			*length = n - sizeof *mad;
> -		else
> -			*length = 0;
> -		return mad->agent_id;
> -	}
> -
> -	if (n == -EWOULDBLOCK) {
> -		if (!errno)
> -			errno = EWOULDBLOCK;
> -		return n;
> -	}
> -
> -	DEBUG("read returned %zu > sizeof umad %zu + length %d (%m)",
> -	      mad->length - sizeof *mad, sizeof *mad, *length);
> +	if (abi_version == 5)
> +		n = read_abi_5(port->dev_fd, umad, length);
> +	else
> +		n = read_data(port->dev_fd, umad,
> +			      sizeof(struct ib_user_mad) + *length, length);
>  
> -	*length = mad->length - sizeof *mad;
> -	if (!errno)
> -		errno = EIO;
> -	return -errno;
> +	return n;
>  }
>  
>  int
> @@ -996,10 +1104,10 @@ umad_addr_dump(ib_mad_addr_t *addr)
>  	gid_str[i*2] = 0;
>  	IBWARN("qpn %d qkey 0x%x lid 0x%x sl %d\n"
>  		"grh_present %d gid_index %d hop_limit %d traffic_class %d flow_label 0x%x\n"
> -		"Gid 0x%s",
> +		"Gid 0x%s pkey_index %d",
>  		ntohl(addr->qpn), ntohl(addr->qkey), ntohs(addr->lid), addr->sl,
>  		addr->grh_present, (int)addr->gid_index, (int)addr->hop_limit,
> -		(int)addr->traffic_class, addr->flow_label, gid_str);
> +		(int)addr->traffic_class, addr->flow_label, gid_str, addr->pkey_index);
>  }
>  
>  void
> 




More information about the general mailing list