[ofa-general] Re: [PATCH] libibcm: get correct ucm device name

Jeff Squyres jsquyres at cisco.com
Tue Jul 22 11:47:05 PDT 2008


Sean --

I have since rebooted my one node that got the ordering wrong upon  
startup, and now despite many reboots on all my nodes that have an  
NE020 and ConnectX HCA (3), I cannot get the ordering to be "wrong"  
again -- it always puts ConnectX first, so the original problem  
doesn't surface.  :-\

But FWIW, this patch seems to work fine for me in the scenarios where  
the IB device is first.

Thanks!


On Jul 22, 2008, at 3:12 AM, Sean Hefty wrote:

> Search for the ucm device name that corresponds to a given libibverbs
> device name.  When non-IB devices are present in the system, the ucm
> device name can differ from the uverbs device name, so we need to
> lookup the correct name.
>
> Signed-off-by: Sean Hefty <sean.hefty at intel.com>
> ---
> Jeff, can you give this a try and see if it works on your systems?   
> I've
> tested the patch on my own system, but I only have 1 device in each  
> of my
> systems.  (I'm limited by my hardware here.)  If this works, this  
> change,
> and the other CM change to remove unneeded prints should both go into
> the next OFED release.
>
> diff --git a/src/cm.c b/src/cm.c
> index c0bcba3..571e28a 100644
> --- a/src/cm.c
> +++ b/src/cm.c
> @@ -67,6 +67,10 @@
> static int abi_ver;
> static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
>
> +enum {
> +	IB_UCM_MAX_DEVICES = 32
> +};
> +
> #define CM_CREATE_MSG_CMD_RESP(msg, cmd, resp, type, size) \
> do {                                        \
> 	struct cm_abi_cmd_hdr *hdr;         \
> @@ -147,35 +151,64 @@ static int ucm_init(void)
> 	return ret;
> }
>
> +static int ucm_get_dev_index(char *dev_name)
> +{
> +	char *dev_path;
> +	char ibdev[IBV_SYSFS_NAME_MAX];
> +	int i, ret;
> +
> +	for (i = 0; i < IB_UCM_MAX_DEVICES; i++) {
> +		ret = asprintf(&dev_path, "/sys/class/infiniband_cm/ucm%d", i);
> +		if (ret < 0)
> +			return -1;
> +
> +		ret = ibv_read_sysfs_file(dev_path, "ibdev", ibdev, sizeof ibdev);
> +		if (ret < 0)
> +			continue;
> +
> +		if (!strcmp(dev_name, ibdev)) {
> +			free(dev_path);
> +			return i;
> +		}
> +
> +		free(dev_path);
> +	}
> +	return -1;
> +}
> +
> struct ib_cm_device* ib_cm_open_device(struct ibv_context  
> *device_context)
> {
> 	struct ib_cm_device *dev;
> 	char *dev_path;
> +	int index, ret;
>
> 	if (ucm_init())
> 		return NULL;
>
> +	index = ucm_get_dev_index(device_context->device->name);
> +	if (index < 0)
> +		return NULL;
> +
> 	dev = malloc(sizeof *dev);
> 	if (!dev)
> 		return NULL;
>
> 	dev->device_context = device_context;
>
> -	if (asprintf(&dev_path, "/dev/infiniband/ucm%s",
> -		 device_context->device->dev_name + sizeof("uverbs") - 1) < 0)
> -		goto err2;
> +	ret = asprintf(&dev_path, "/dev/infiniband/ucm%d", index);
> +	if (ret < 0)
> +		goto err1;
>
> 	dev->fd = open(dev_path, O_RDWR);
> -	if (dev->fd < 0) {
> -		goto err;
> -	}
> +	if (dev->fd < 0)
> +		goto err2;
>
> 	free(dev_path);
> 	return dev;
>
> -err:
> -	free(dev_path);
> err2:
> +	free(dev_path);
> +err1:
> 	free(dev);
> 	return NULL;
> }
>
>
>


-- 
Jeff Squyres
Cisco Systems




More information about the general mailing list