[ofa-general] Re: [PATCH] ibsim: Allow multi-threaded use of ibsim

sebastien dugue sebastien.dugue at bull.net
Tue Jul 28 00:53:30 PDT 2009


  Hi Sasha,

On Mon, 27 Jul 2009 18:47:22 +0300
Sasha Khapyorsky <sashak at voltaire.com> wrote:

> On 13:38 Mon 27 Jul     , sebastien dugue wrote:
> > 
> >   Right now, the socket name used between the client and ibsim is appended
> > with the client's pid. However this does not work when several threads with
> > the same pid want to register their clients.
> 
> And strictly speaking umad2sim by itself is not thread safe. So enabling
> a multi-threaded clients you need to keep this in a mind, may be to add
> some notes about this to README.

  Well in fact I poorly chose the subject for this mail. It should have read:

  'Allow multiple agents per process'

  There's nothing that needs to be thread safe in there. The thing is, if you
want multiple threads from a single process to register their own agents, then
the agents cannot be attached to the umad2sim_dev instance, but rather to a
file instance opened on that device. That way each thread that wants to register
an agent does it's own umad_open_port().


> 
> >   instead of the pid, use a combination of the thread pid along with a
> > connection number incremented for each new client.
> > 
> >   Also, attach the agents to the file instance opened on the device rather
> > than on the device itself.
> 
> Would be nice to have this as separate patch. 

  No, it's all the same thing. Sorry for the confusion.

  More below...

> 
> > 
> > Signed-off-by: Sebastien Dugue <sebastien.dugue at bull.net>
> > ---
> >  umad2sim/sim_client.c |   11 ++-
> >  umad2sim/umad2sim.c   |  171 +++++++++++++++++++++++++++++++++++++-----------
> >  2 files changed, 140 insertions(+), 42 deletions(-)
> > 
> > diff --git a/umad2sim/sim_client.c b/umad2sim/sim_client.c
> > index eb42a7c..22a50a2 100644
> > --- a/umad2sim/sim_client.c
> > +++ b/umad2sim/sim_client.c
> > @@ -209,6 +209,8 @@ static int sim_init(struct sim_client *sc, char *nodeid)
> >  	char *connect_port;
> >  	char *connect_host;
> >  	unsigned short port;
> > +	static int conn_num = 0;
> > +	int conn_id;
> >  
> >  	connect_port = getenv("IBSIM_SERVER_PORT");
> >  	connect_host = getenv("IBSIM_SERVER_NAME");
> > @@ -228,7 +230,10 @@ static int sim_init(struct sim_client *sc, char *nodeid)
> >  	if ((ctlfd = socket(remote_mode ? PF_INET : PF_LOCAL, SOCK_DGRAM, 0)) < 0)
> >  		IBPANIC("can't get socket (ctlfd)");
> >  
> > -	size = make_name(&name, NULL, 0, "%s:ctl%d", socket_basename, pid);
> > +	conn_id = ((conn_num & 0xff) << 24) | (pid & 0xffffff);
> > +	conn_num++;
> 
> BTW why to not use pid/tid combination? This would eliminate the needs in
> yet another static (and thread unsafe) variable.

  That was my first thought, but gettid() is pretty much Linux specific so I
tried another approach. I agree though that conn_num and it's use should be
protected somehow. Will add a spinlock for that.

> 
> > +
> > +	size = make_name(&name, NULL, 0, "%s:ctl%d", socket_basename, conn_id);
> >  
> >  	if (bind(ctlfd, (struct sockaddr *)&name, size) < 0)
> >  		IBPANIC("can't bind ctl socket");
> > @@ -243,7 +248,7 @@ static int sim_init(struct sim_client *sc, char *nodeid)
> >  
> >  	sc->fd_ctl = ctlfd;
> >  
> > -	size = make_name(&name, NULL, 0, "%s:in%d", socket_basename, pid);
> > +	size = make_name(&name, NULL, 0, "%s:in%d", socket_basename, conn_id);
> >  
> >  	if (bind(fd, (struct sockaddr *)&name, size) < 0)
> >  		IBPANIC("can't bind input socket");
> > @@ -254,7 +259,7 @@ static int sim_init(struct sim_client *sc, char *nodeid)
> >  		IBPANIC("can't read data from bound socket");
> >  	port = ntohs(name.name_i.sin_port);
> >  
> > -	sc->clientid = sim_connect(sc, remote_mode ? port : pid, 0, nodeid);
> > +	sc->clientid = sim_connect(sc, remote_mode ? port : conn_id, 0, nodeid);
> >  	if (sc->clientid < 0)
> >  		IBPANIC("connect failed");
> >  
> > diff --git a/umad2sim/umad2sim.c b/umad2sim/umad2sim.c
> > index 55440ec..71f7c67 100644
> > --- a/umad2sim/umad2sim.c
> > +++ b/umad2sim/umad2sim.c
> > @@ -81,12 +81,17 @@ struct umad2sim_dev {
> >  	char name[32];
> >  	uint8_t port;
> >  	struct sim_client sim_client;
> 
> Why do we need sim_client field here if it is going to umad2sim_devfile
> structure?

  We need it here too in umad2sim_dev_create() and children to create the
sysfs tree.

> 
> > -	unsigned agent_idx[256];
> > -	struct ib_user_mad_reg_req agents[32];
> >  	char umad_path[256];
> >  	char issm_path[256];
> >  };
> >  
> > +struct umad2sim_devfile {
> > +	struct umad2sim_dev *dev;
> > +	struct sim_client sim_client;
> > +	struct ib_user_mad_reg_req agents[32];
> > +	unsigned agent_idx[256];
> > +};
> > +
> >  static int (*real_open) (const char *path, int flags, ...);
> >  static int (*real_close) (int fd);
> >  static ssize_t(*real_read) (int fd, void *buf, size_t count);
> > @@ -113,6 +118,7 @@ static char umad2sim_sysfs_prefix[32];
> >  
> >  static unsigned umad2sim_initialized;
> >  static struct umad2sim_dev *devices[32];
> > +static struct umad2sim_devfile *devfiles[1024];
> >  
> >  /*
> >   *  sysfs stuff
> > @@ -378,9 +384,69 @@ static int dev_sysfs_create(struct umad2sim_dev *dev)
> >   * umad2sim device
> >   *
> >   */
> > +static int umad2sim_open(struct umad2sim_dev *dev)
> > +{
> > +	int i;
> > +	int fd;
> > +	struct umad2sim_devfile *file;
> > +
> > +	/* Find unused fd */
> > +	for (fd = 0; fd < 1024; fd++)
> 
> arrsize() is more suitable here (and in another similar places).

  Right, will change those.

> 
> > +		if (devfiles[fd] == NULL)
> > +			break;
> > +
> > +	if (fd == 1024) {
> > +		ERROR("umad2sim_open: No more available files\n");
> > +		errno = EMFILE;
> > +		return -1;
> > +	}
> >  
> > -static ssize_t umad2sim_read(struct umad2sim_dev *dev, void *buf, size_t count)
> > +	if ((file = calloc(1, sizeof(struct umad2sim_devfile))) == NULL) {
> > +		ERROR("umad2sim_open: Out of memory\n");
> > +		errno = ENOMEM;
> > +		return -1;
> > +	}
> > +
> > +	file->dev = dev;
> > +
> > +	for (i = 0; i < arrsize(file->agents); i++)
> > +		file->agents[i].id = (uint32_t)(-1);
> > +
> > +	for (i = 0; i < arrsize(file->agent_idx); i++)
> > +		file->agent_idx[i] = (unsigned)(-1);
> > +
> > +	if (sim_client_init(&file->sim_client) < 0) {
> > +		free(file);
> > +		return -1;
> > +	}
> > +
> > +	devfiles[fd] = file;
> > +
> > +	return 1024 + fd;
> > +}
> > +
> > +static int umad2sim_close(int fd)
> >  {
> > +	struct umad2sim_devfile *file;
> > +
> > +	if ((file = devfiles[fd - 1024]) == NULL) {
> > +		errno = EBADF;
> > +		ERROR("umad2sim_close: no such file\n");
> > +		return -1;
> > +	}
> > +
> > +	sim_client_exit(&file->sim_client);
> > +
> > +	free(file);
> > +
> > +	devfiles[fd - 1024] = NULL;
> > +
> > +	return 0;
> > +}
> > +
> > +static ssize_t umad2sim_read(int fd, void *buf, size_t count)
> > +{
> > +	struct umad2sim_devfile *file;
> >  	struct sim_request req;
> >  	ib_user_mad_t *umad = (ib_user_mad_t *) buf;
> >  	unsigned mgmt_class;
> > @@ -388,7 +454,13 @@ static ssize_t umad2sim_read(struct umad2sim_dev *dev, void *buf, size_t count)
> >  
> >  	DEBUG("umad2sim_read: %zu...\n", count);
> >  
> > -	cnt = real_read(dev->sim_client.fd_pktin, &req, sizeof(req));
> > +	if ((file = devfiles[fd - 1024]) == NULL) {
> > +		errno = EBADF;
> > +		ERROR("umad2sim_read: no such file\n");
> > +		return -1;
> > +	}
> > +
> > +	cnt = real_read(file->sim_client.fd_pktin, &req, sizeof(req));
> >  	DEBUG("umad2sim_read: got %d...\n", cnt);
> >  	if (cnt < sizeof(req)) {
> >  		ERROR("umad2sim_read: partial request - skip.\n");
> > @@ -406,7 +478,7 @@ static ssize_t umad2sim_read(struct umad2sim_dev *dev, void *buf, size_t count)
> >  	      mad_get_field(req.mad, 0, IB_MAD_ATTRID_F),
> >  	      mad_get_field(req.mad, 0, IB_MAD_ATTRMOD_F));
> >  
> > -	if (mgmt_class >= arrsize(dev->agent_idx)) {
> > +	if (mgmt_class >= arrsize(file->agent_idx)) {
> >  		ERROR("bad mgmt_class 0x%x\n", mgmt_class);
> >  		mgmt_class = 0;
> >  	}
> > @@ -415,7 +487,7 @@ static ssize_t umad2sim_read(struct umad2sim_dev *dev, void *buf, size_t count)
> >  		uint64_t trid = mad_get_field64(req.mad, 0, IB_MAD_TRID_F);
> >  		umad->agent_id = (trid >> 32) & 0xffff;
> >  	} else
> > -		umad->agent_id = dev->agent_idx[mgmt_class];
> > +		umad->agent_id = file->agent_idx[mgmt_class];
> >  
> >  	umad->status = ntohl(req.status);
> >  	umad->timeout_ms = 0;
> > @@ -437,9 +509,9 @@ static ssize_t umad2sim_read(struct umad2sim_dev *dev, void *buf, size_t count)
> >  	return umad->length;
> >  }
> >  
> > -static ssize_t umad2sim_write(struct umad2sim_dev *dev,
> > -			      const void *buf, size_t count)
> > +static ssize_t umad2sim_write(int fd, const void *buf, size_t count)
> >  {
> > +	struct umad2sim_devfile *file;
> >  	struct sim_request req;
> >  	ib_user_mad_t *umad = (ib_user_mad_t *) buf;
> >  	int cnt;
> > @@ -457,12 +529,18 @@ static ssize_t umad2sim_write(struct umad2sim_dev *dev,
> >  
> >  	DEBUG("umad2sim_write: %zu...\n", count);
> >  
> > +	if ((file = devfiles[fd - 1024]) == NULL) {
> > +		errno = EBADF;
> > +		ERROR("umad2sim_write: no such file\n");
> > +		return -1;
> > +	}
> > +
> >  	DEBUG("umad2sim_write: umad: agent_id=%u, retries=%u, "
> >  	      "agent.class=%x, agent.qpn=%u, "
> >  	      "addr.qpn=%u, addr.lid=%u\n",
> >  	      umad->agent_id, umad->retries,
> > -	      dev->agents[umad->agent_id].mgmt_class,
> > -	      dev->agents[umad->agent_id].qpn,
> > +	      file->agents[umad->agent_id].mgmt_class,
> > +	      file->agents[umad->agent_id].qpn,
> >  	      htonl(umad->addr.qpn), htons(umad->addr.lid));
> >  	DEBUG("umad2sim_write: mad: method=%x, response=%x, mgmtclass=%x, "
> >  	      "attrid=%x, attrmod=%x\n",
> > @@ -476,7 +554,7 @@ static ssize_t umad2sim_write(struct umad2sim_dev *dev,
> >  	req.slid = req.dlid == 0xffff ? 0xffff : 0;	/* 0 - means auto
> >  							   (supported by ibsim) */ ;
> >  	req.dqp = umad->addr.qpn;
> > -	req.sqp = htonl(dev->agents[umad->agent_id].qpn);
> > +	req.sqp = htonl(file->agents[umad->agent_id].qpn);
> >  	req.status = 0;
> >  
> >  	cnt = count - umad_size();
> > @@ -492,7 +570,7 @@ static ssize_t umad2sim_write(struct umad2sim_dev *dev,
> >  		mad_set_field64(req.mad, 0, IB_MAD_TRID_F, trid);
> >  	}
> >  
> > -	cnt = write(dev->sim_client.fd_pktout, (void *)&req, sizeof(req));
> > +	cnt = write(file->sim_client.fd_pktout, (void *)&req, sizeof(req));
> >  	if (cnt < 0) {
> >  		ERROR("umad2sim_write: cannot write\n");
> >  		return -1;
> > @@ -503,19 +581,21 @@ static ssize_t umad2sim_write(struct umad2sim_dev *dev,
> >  	return count;
> >  }
> >  
> > -static int register_agent(struct umad2sim_dev *dev,
> > +static int register_agent(struct umad2sim_devfile *file,
> >  			  struct ib_user_mad_reg_req *req)
> >  {
> > -	unsigned i;
> > +	unsigned i;		
> > +
> >  	DEBUG("register_agent: id = %u, qpn = %u, mgmt_class = %u,"
> >  	      " mgmt_class_version = %u, rmpp_version = %u\n",
> >  	      req->id, req->qpn, req->mgmt_class, req->mgmt_class_version,
> >  	      req->rmpp_version);
> > -	for (i = 0; i < arrsize(dev->agents); i++)
> > -		if (dev->agents[i].id == (uint32_t)(-1)) {
> > +
> > +	for (i = 0; i < arrsize(file->agents); i++)
> > +		if (file->agents[i].id == (uint32_t)(-1)) {
> >  			req->id = i;
> > -			dev->agents[i] = *req;
> > -			dev->agent_idx[req->mgmt_class] = i;
> > +			file->agents[i] = *req;
> > +			file->agent_idx[req->mgmt_class] = i;
> >  			DEBUG("agent registered: %d\n", i);
> >  			return 0;
> >  		}
> > @@ -523,28 +603,36 @@ static int register_agent(struct umad2sim_dev *dev,
> >  	return -1;
> >  }
> >  
> > -static int unregister_agent(struct umad2sim_dev *dev, unsigned id)
> > +static int unregister_agent(struct umad2sim_devfile *file, unsigned id)
> >  {
> >  	unsigned mgmt_class;
> > -	if (id >= arrsize(dev->agents)) {
> > +	if (id >= arrsize(file->agents)) {
> >  		errno = EINVAL;
> >  		return -1;
> >  	}
> > -	mgmt_class = dev->agents[id].mgmt_class;
> > -	dev->agents[id].id = (uint32_t)(-1);
> > -	dev->agent_idx[mgmt_class] = -1;
> > +	mgmt_class = file->agents[id].mgmt_class;
> > +	file->agents[id].id = (uint32_t)(-1);
> > +	file->agent_idx[mgmt_class] = -1;
> >  	return 0;
> >  }
> >  
> > -static int umad2sim_ioctl(struct umad2sim_dev *dev, unsigned long request,
> > -			  void *arg)
> > +static int umad2sim_ioctl(int fd, unsigned long request, void *arg)
> >  {
> > +	struct umad2sim_devfile *file;
> > +
> >  	DEBUG("umad2sim_ioctl: %lu, %p...\n", request, arg);
> > +
> > +	if ((file = devfiles[fd - 1024]) == NULL) {
> > +		errno = EBADF;
> > +		ERROR("umad2sim_ioctl: no such file\n");
> > +		return -1;
> > +	}
> > +
> >  	switch (request) {
> >  	case IB_USER_MAD_REGISTER_AGENT:
> > -		return register_agent(dev, arg);
> > +		return register_agent(file, arg);
> >  	case IB_USER_MAD_UNREGISTER_AGENT:
> > -		return unregister_agent(dev, *((unsigned *)arg));
> > +		return unregister_agent(file, *((unsigned *)arg));
> >  	case IB_USER_MAD_ENABLE_PKEY:
> >  		return 0;
> >  	default:
> > @@ -556,7 +644,6 @@ static int umad2sim_ioctl(struct umad2sim_dev *dev, unsigned long request,
> >  static struct umad2sim_dev *umad2sim_dev_create(unsigned num, const char *name)
> >  {
> >  	struct umad2sim_dev *dev;
> > -	unsigned i;
> >  
> >  	DEBUG("umad2sim_dev_create: %s...\n", name);
> >  
> > @@ -573,10 +660,6 @@ static struct umad2sim_dev *umad2sim_dev_create(unsigned num, const char *name)
> >  
> >  	dev->port = mad_get_field(&dev->sim_client.portinfo, 0,
> >  				  IB_PORT_LOCAL_PORT_F);
> > -	for (i = 0; i < arrsize(dev->agents); i++)
> > -		dev->agents[i].id = (uint32_t)(-1);
> > -	for (i = 0; i < arrsize(dev->agent_idx); i++)
> > -		dev->agent_idx[i] = (unsigned)(-1);
> >  
> >  	dev_sysfs_create(dev);
> >  
> > @@ -637,6 +720,11 @@ static void umad2sim_cleanup(void)
> >  	char path[1024];
> >  	unsigned i;
> >  	DEBUG("umad2sim_cleanup...\n");
> > +
> > +	for (i = 0; i < 1024; i++)
> > +		if (devfiles[i])
> > +			free(devfiles[i]);
> 
> And what about sim_client disconnection? umad2sim_dev_delete() did this.

  This is now handled in umad2sim_close() called from close(). close() did
nothing previously.

  Hope this shed some light in the intents of the patch.

  Thanks for your comments,

  Sebastien.

> 
> Sasha
> 
> > +
> >  	for (i = 0; i < arrsize(devices); i++)
> >  		if (devices[i]) {
> >  			umad2sim_dev_delete(devices[i]);
> > @@ -756,7 +844,7 @@ int open(const char *path, int flags, ...)
> >  		if (!(dev = devices[i]))
> >  			continue;
> >  		if (!strncmp(path, dev->umad_path, sizeof(dev->umad_path))) {
> > -			return 1024 + i;
> > +			return umad2sim_open(dev);
> >  		}
> >  		if (!strncmp(path, dev->issm_path, sizeof(dev->issm_path))) {
> >  			sim_client_set_sm(&dev->sim_client, 1);
> > @@ -779,7 +867,7 @@ int close(int fd)
> >  		sim_client_set_sm(&dev->sim_client, 0);
> >  		return 0;
> >  	} else if (fd >= 1024) {
> > -		return 0;
> > +		return umad2sim_close(fd);
> >  	} else
> >  		return real_close(fd);
> >  }
> > @@ -791,7 +879,7 @@ ssize_t read(int fd, void *buf, size_t count)
> >  	if (fd >= 2048)
> >  		return -1;
> >  	else if (fd >= 1024)
> > -		return umad2sim_read(devices[fd - 1024], buf, count);
> > +		return umad2sim_read(fd, buf, count);
> >  	else
> >  		return real_read(fd, buf, count);
> >  }
> > @@ -803,7 +891,7 @@ ssize_t write(int fd, const void *buf, size_t count)
> >  	if (fd >= 2048)
> >  		return -1;
> >  	else if (fd >= 1024)
> > -		return umad2sim_write(devices[fd - 1024], buf, count);
> > +		return umad2sim_write(fd, buf, count);
> >  	else
> >  		return real_write(fd, buf, count);
> >  }
> > @@ -821,7 +909,7 @@ int ioctl(int fd, unsigned long request, ...)
> >  	if (fd >= 2048)
> >  		return -1;
> >  	else if (fd >= 1024)
> > -		return umad2sim_ioctl(devices[fd - 1024], request, arg);
> > +		return umad2sim_ioctl(fd, request, arg);
> >  	else
> >  		return real_ioctl(fd, request, arg);
> >  }
> > @@ -836,9 +924,14 @@ int poll(struct pollfd *pfds, nfds_t nfds, int timeout)
> >  
> >  	for (i = 0; i < nfds; i++) {
> >  		if (pfds[i].fd >= 1024 && pfds[i].fd < 2048) {
> > -			struct umad2sim_dev *dev = devices[pfds[i].fd - 1024];
> > +			struct umad2sim_devfile *file;
> > +
> > +			if ((file = devfiles[pfds[i].fd - 1024]) == NULL) {
> > +				errno = EBADF;
> > +				return -1;
> > +			}
> >  			saved_fds[i] = pfds[i].fd;
> > -			pfds[i].fd = dev->sim_client.fd_pktin;
> > +			pfds[i].fd = file->sim_client.fd_pktin;
> >  		} else
> >  			saved_fds[i] = 0;
> >  	}
> > -- 
> > 1.6.3.1
> > 
> 



More information about the general mailing list