[openib-general] [PATCH] opensm: libibmad: rpc API which supports more than one ports.

Hal Rosenstock halr at voltaire.com
Wed Aug 30 09:13:41 PDT 2006


Hi Sasha,

On Tue, 2006-08-29 at 21:29, Sasha Khapyorsky wrote:
> Hi Hal,
> 
> On 20:09 Tue 29 Aug     , Hal Rosenstock wrote:
> > Hi Sasha,
> > 
> > On Fri, 2006-08-25 at 09:17, Sasha Khapyorsky wrote:
> > > This provides RPC like API which may work with several ports.
> > 
> > I think you mean "can work" rather "may work" :-)
> 
> Yes.
> 
> Some limitation we will have from libumad - this tracks already open
> ports. I'm not sure why (the same port can be opened from another
> process or by forking current). I think this may be the next
> improvement there.

OK.
 
> > > Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
> > > ---
> > > 
> > >  libibmad/include/infiniband/mad.h |    9 +++
> > >  libibmad/src/libibmad.map         |    4 +
> > >  libibmad/src/register.c           |   20 +++++--
> > >  libibmad/src/rpc.c                |  106 +++++++++++++++++++++++++++++++++++--
> > >  libibumad/src/umad.c              |    4 +
> > 
> > ../doc/libibmad.txt should also be updated appropriately for the new
> > routines.
> 
> Sure, I thought to stabilize this API first.

OK.
 
> > >  5 files changed, 130 insertions(+), 13 deletions(-)
> > > 
> > > diff --git a/libibmad/include/infiniband/mad.h b/libibmad/include/infiniband/mad.h
> > > index 45ff572..bd8a80b 100644
> > > --- a/libibmad/include/infiniband/mad.h
> > > +++ b/libibmad/include/infiniband/mad.h
> > > @@ -660,6 +660,7 @@ uint64_t mad_trid(void);
> > >  int	mad_build_pkt(void *umad, ib_rpc_t *rpc, ib_portid_t *dport, ib_rmpp_hdr_t *rmpp, void *data);
> > >  
> > >  /* register.c */
> > > +int	mad_register_port_client(int port_id, int mgmt, uint8_t rmpp_version);
> > >  int	mad_register_client(int mgmt, uint8_t rmpp_version);
> > >  int	mad_register_server(int mgmt, uint8_t rmpp_version,
> > >  			    uint32_t method_mask[4], uint32_t class_oui);
> > > @@ -704,6 +705,14 @@ void	madrpc_lock(void);
> > >  void	madrpc_unlock(void);
> > >  void	madrpc_show_errors(int set);
> > >  
> > > +void *	mad_rpc_open_port(char *dev_name, int dev_port, int *mgmt_classes,
> > > +			  int num_classes);
> > > +void	mad_rpc_close_port(void *ibmad_port);
> > > +void *	mad_rpc(void *ibmad_port, ib_rpc_t *rpc, ib_portid_t *dport,
> > > +		void *payload, void *rcvdata);
> > > +void *  mad_rpc_rmpp(void *ibmad_port, ib_rpc_t *rpc, ib_portid_t *dport,
> > > +		     ib_rmpp_hdr_t *rmpp, void *data);
> > > +
> > >  /* smp.c */
> > >  uint8_t * smp_query(void *buf, ib_portid_t *id, uint attrid, uint mod,
> > >  		    uint timeout);
> > > diff --git a/libibmad/src/libibmad.map b/libibmad/src/libibmad.map
> > > index bf81bd1..78b7ff0 100644
> > > --- a/libibmad/src/libibmad.map
> > > +++ b/libibmad/src/libibmad.map
> > > @@ -62,6 +62,10 @@ IBMAD_1.0 {
> > 
> > This should be 1.1
> 
> Ok.
> 
> > 
> > >  		ib_resolve_self;
> > >  		ib_resolve_smlid;
> > >  		ibdebug;
> > > +		mad_rpc_open_port;
> > > +		mad_rpc_close_port;
> > > +		mad_rpc;
> > > +		mad_rpc_rmpp;
> > >  		madrpc;
> > >  		madrpc_def_timeout;
> > >  		madrpc_init;
> > 
> > What about mad_register_port_client ? Should that be included here ?
> 
> It is not used externally - all registrations are done in _open(). So I
> don't see this as part of the new "API". Maybe if we will decide to
> extend it later we will need to "export" this symbol.

OK.
 
> > > diff --git a/libibmad/src/register.c b/libibmad/src/register.c
> > > index 4f44625..52d6989 100644
> > > --- a/libibmad/src/register.c
> > > +++ b/libibmad/src/register.c
> > > @@ -43,6 +43,7 @@ #include <unistd.h>
> > >  #include <pthread.h>
> > >  #include <sys/time.h>
> > >  #include <string.h>
> > > +#include <errno.h>
> > >  
> > >  #include <umad.h>
> > >  #include "mad.h"
> > > @@ -118,7 +119,7 @@ mad_agent_class(int agent)
> > >  }
> > >  
> > >  int
> > > -mad_register_client(int mgmt, uint8_t rmpp_version)
> > > +mad_register_port_client(int port_id, int mgmt, uint8_t rmpp_version)
> > >  {
> > >  	int vers, agent;
> > >  
> > > @@ -126,7 +127,7 @@ mad_register_client(int mgmt, uint8_t rm
> > >  		DEBUG("Unknown class %d mgmt_class", mgmt);
> > >  		return -1;
> > >  	}
> > > -	if ((agent = umad_register(madrpc_portid(), mgmt,
> > > +	if ((agent = umad_register(port_id, mgmt,
> > >  				   vers, rmpp_version, 0)) < 0) {
> > >  		DEBUG("Can't register agent for class %d", mgmt);
> > >  		return -1;
> > > @@ -137,13 +138,22 @@ mad_register_client(int mgmt, uint8_t rm
> > >  		return -1;
> > >  	}
> > >  
> > > -	if (register_agent(agent, mgmt) < 0)
> > > -		return -1;
> > > -
> > >  	return agent;
> > >  }
> > >  
> > >  int
> > > +mad_register_client(int mgmt, uint8_t rmpp_version)
> > > +{
> > > +	int agent;
> > > +
> > > +	agent = mad_register_port_client(madrpc_portid(), mgmt, rmpp_version);
> > > +	if (agent < 0)
> > > +		return agent;
> > > +
> > > +	return register_agent(agent, mgmt);
> > > +}
> > > +
> > > +int
> > >  mad_register_server(int mgmt, uint8_t rmpp_version,
> > >  		    uint32_t method_mask[4], uint32_t class_oui)
> > >  {
> > > diff --git a/libibmad/src/rpc.c b/libibmad/src/rpc.c
> > > index b2d3e77..ac4f361 100644
> > > --- a/libibmad/src/rpc.c
> > > +++ b/libibmad/src/rpc.c
> > > @@ -48,6 +48,13 @@ #include <errno.h>
> > >  #include <umad.h>
> > >  #include "mad.h"
> > >  
> > > +#define MAX_CLASS 256
> > > +
> > > +struct ibmad_port {
> > > +	int port_id;  /* file descriptor returned by umad_open() */
> > > +	int class_agents[MAX_CLASS]; /* class2agent mapper */
> > > +};
> > > +
> > >  int ibdebug;
> > >  
> > >  static int mad_portid = -1;
> > > @@ -105,7 +112,8 @@ madrpc_portid(void)
> > >  }
> > >  
> > >  static int 
> > > -_do_madrpc(void *sndbuf, void *rcvbuf, int agentid, int len, int timeout)
> > > +_do_madrpc(int port_id, void *sndbuf, void *rcvbuf, int agentid, int len,
> > > +	   int timeout)
> > >  {
> > >  	uint32_t trid; /* only low 32 bits */
> > >  	int retries;
> > > @@ -133,7 +141,7 @@ _do_madrpc(void *sndbuf, void *rcvbuf, i
> > >  		}
> > >  
> > >  		length = len;
> > > -		if (umad_send(mad_portid, agentid, sndbuf, length, timeout, 0) < 0) {
> > > +		if (umad_send(port_id, agentid, sndbuf, length, timeout, 0) < 0) {
> > >  			IBWARN("send failed; %m");
> > >  			return -1;
> > >  		}
> > > @@ -141,7 +149,7 @@ _do_madrpc(void *sndbuf, void *rcvbuf, i
> > >  		/* Use same timeout on receive side just in case */
> > >  		/* send packet is lost somewhere. */
> > >  		do {
> > > -			if (umad_recv(mad_portid, rcvbuf, &length, timeout) < 0) {
> > > +			if (umad_recv(port_id, rcvbuf, &length, timeout) < 0) {
> > >  				IBWARN("recv failed: %m");
> > >  				return -1;
> > >  			}
> > > @@ -164,8 +172,10 @@ _do_madrpc(void *sndbuf, void *rcvbuf, i
> > >  }
> > >  
> > >  void *
> > > -madrpc(ib_rpc_t *rpc, ib_portid_t *dport, void *payload, void *rcvdata)
> > > +mad_rpc(void *port_id, ib_rpc_t *rpc, ib_portid_t *dport, void *payload,
> > > +	void *rcvdata)
> > >  {
> > > +	struct ibmad_port *p = port_id;
> > >  	int status, len;
> > >  	uint8_t sndbuf[1024], rcvbuf[1024], *mad;
> > >  
> > > @@ -175,7 +185,8 @@ madrpc(ib_rpc_t *rpc, ib_portid_t *dport
> > >  	if ((len = mad_build_pkt(sndbuf, rpc, dport, 0, payload)) < 0)
> > >  		return 0;
> > >  
> > > -	if ((len = _do_madrpc(sndbuf, rcvbuf, mad_class_agent(rpc->mgtclass),
> > > +	if ((len = _do_madrpc(p->port_id, sndbuf, rcvbuf,
> > > +			      p->class_agents[rpc->mgtclass],
> > >  			      len, rpc->timeout)) < 0)
> > >  		return 0;
> > >  
> > > @@ -198,8 +209,10 @@ madrpc(ib_rpc_t *rpc, ib_portid_t *dport
> > >  }
> > >  
> > >  void *
> > > -madrpc_rmpp(ib_rpc_t *rpc, ib_portid_t *dport, ib_rmpp_hdr_t *rmpp, void *data)
> > > +mad_rpc_rmpp(void *port_id, ib_rpc_t *rpc, ib_portid_t *dport,
> > > +	     ib_rmpp_hdr_t *rmpp, void *data)
> > >  {
> > > +	struct ibmad_port *p = port_id;
> > >  	int status, len;
> > >  	uint8_t sndbuf[1024], rcvbuf[1024], *mad;
> > >  
> > > @@ -210,7 +223,8 @@ madrpc_rmpp(ib_rpc_t *rpc, ib_portid_t *
> > >  	if ((len = mad_build_pkt(sndbuf, rpc, dport, rmpp, data)) < 0)
> > >  		return 0;
> > >  
> > > -	if ((len = _do_madrpc(sndbuf, rcvbuf, mad_class_agent(rpc->mgtclass),
> > > +	if ((len = _do_madrpc(p->port_id, sndbuf, rcvbuf,
> > > +			      p->class_agents[rpc->mgtclass],
> > >  			      len, rpc->timeout)) < 0)
> > >  		return 0;
> > >  
> > > @@ -249,6 +263,24 @@ madrpc_rmpp(ib_rpc_t *rpc, ib_portid_t *
> > >  	return data;
> > >  }
> > >  
> > > +void *
> > > +madrpc(ib_rpc_t *rpc, ib_portid_t *dport, void *payload, void *rcvdata)
> > > +{
> > > +	struct ibmad_port port;
> > > +	port.port_id = mad_portid;
> > > +	port.class_agents[rpc->mgtclass] = mad_class_agent(rpc->mgtclass);
> > > +	return mad_rpc(&port, rpc, dport, payload, rcvdata);
> > > +}
> > > +
> > > +void *
> > > +madrpc_rmpp(ib_rpc_t *rpc, ib_portid_t *dport, ib_rmpp_hdr_t *rmpp, void *data)
> > > +{
> > > +	struct ibmad_port port;
> > > +	port.port_id = mad_portid;
> > > +	port.class_agents[rpc->mgtclass] = mad_class_agent(rpc->mgtclass);
> > > +	return mad_rpc_rmpp(&port, rpc, dport, rmpp, data);
> > > +}
> > > +
> > >  static pthread_mutex_t rpclock = PTHREAD_MUTEX_INITIALIZER;
> > >  
> > >  void
> > > @@ -282,3 +314,63 @@ madrpc_init(char *dev_name, int dev_port
> > >  			IBPANIC("client_register for mgmt %d failed", mgmt);
> > >  	}
> > >  }
> > > +
> > > +void *
> > > +mad_rpc_open_port(char *dev_name, int dev_port,
> > > +		  int *mgmt_classes, int num_classes)
> > > +{
> > > +	struct ibmad_port *p;
> > > +	int port_id;
> > 
> > Should there be some validation on num_classes < MAX_CLASS ?
> 
> Such check is cheap and may be performed (it was not done in
> madrpc_init()).

Guess that validation is needed in both places. I'll add it subsequent
to this.

> Without this the function will "work" (will fail), but in longer way
> (this will fail to register an agent when MAX_CLASS will be overflowed).

Won't it overwrite some structure (scribble on memory) ?
 
> > > +	if (umad_init() < 0) {
> > > +		IBWARN("can't init UMAD library");
> > > +		errno = ENODEV;
> > > +		return NULL;
> > > +	}
> > > +
> > > +	p = malloc(sizeof(*p));
> > > +	if (!p) {
> > > +		errno = ENOMEM;
> > > +		return NULL;
> > > +	}
> > > +	memset(p, 0, sizeof(*p));
> > > +
> > > +	if ((port_id = umad_open_port(dev_name, dev_port)) < 0) {
> > > +		IBWARN("can't open UMAD port (%s:%d)", dev_name, dev_port);
> > > +		if (!errno)
> > > +			errno = EIO;
> > > +		free(p);
> > > +		return NULL;
> > > +	}
> > > +
> > > +	while (num_classes--) {
> > > +		int rmpp_version = 0;
> > > +		int mgmt = *mgmt_classes++;
> > > +		int agent;
> > > +
> > > +		if (mgmt == IB_SA_CLASS)
> > > +			rmpp_version = 1;
> > 
> > There are other classes which can use RMPP. How are they handled ?
> 
> This is copy & paste from madrpc_init().
> This problem is generic for libibmad and I think should be fixed
> separately 

You are right :-(

> (maybe in mad_register_port_client()).

Perhaps. We'll see.

> > > +		if (mgmt < 0 || mgmt >= MAX_CLASS ||
> > > +		    (agent = mad_register_port_client(port_id, mgmt,
> > > +						      rmpp_version)) < 0) {
> > > +			IBWARN("client_register for mgmt %d failed", mgmt);
> > > +			if(!errno)
> > > +				errno = EINVAL;
> > > +			umad_close_port(port_id);
> > > +  			free(p);
> > > +  			return NULL;
> > > +		}
> > > +		p->class_agents[mgmt] = agent;
> > > +	}
> > > +
> > > +	p->port_id = port_id;
> > > +	return p;
> > > +}
> > > +
> > > +void
> > > +mad_rpc_close_port(void *port_id)
> > > +{
> > > +	struct ibmad_port *p = port_id;
> > > +	umad_close_port(p->port_id);
> > > +	free(p);
> > > +}
> > > diff --git a/libibumad/src/umad.c b/libibumad/src/umad.c
> > > index a99fb5a..cb9eef6 100644
> > > --- a/libibumad/src/umad.c
> > > +++ b/libibumad/src/umad.c
> > > @@ -93,12 +93,14 @@ port_alloc(int portid, char *dev, int po
> > >  
> > >  	if (portid < 0 || portid >= UMAD_MAX_PORTS) {
> > >  		IBWARN("bad umad portid %d", portid);
> > > +		errno = EINVAL;
> > >  		return 0;
> > >  	}
> > >  
> > >  	if (port->dev_name[0]) {
> > >  		IBWARN("umad port id %d is already allocated for %s %d",
> > >  			portid, port->dev_name, port->dev_port);
> > > +		errno = EBUSY;
> > >  		return 0;
> > >  	}
> > >  
> > > @@ -567,7 +569,7 @@ umad_open_port(char *ca_name, int portnu
> > >  		return -EINVAL;
> > >  
> > >  	if (!(port = port_alloc(umad_id, ca_name, portnum)))
> > > -		return -EINVAL;
> > > +		return -errno;
> > >  
> > >  	snprintf(port->dev_file, sizeof port->dev_file - 1, "%s/umad%d",
> > >  		 UMAD_DEV_DIR , umad_id);
> > 
> > Is the umad.c change really a separate change from the rest ?
> 
> It was done in order to provide the meanfull errno value in case of
> mad_rpc_open() failure (not needed with madrpc_init() because it does
> exit() if something is wrong) and this can be separated.
> 
> > If so,
> > this patch should be broken into two parts and that is the first part.
> 
> Agree.
> 
> > No need to resubmit for this.
> 
> Ok. And for the rest of changes?

Yes.

-- Hal

> Sasha
> 
> > 
> > -- Hal
> > 





More information about the general mailing list