[openib-general] [PATCH] opensm: libibmad: rpc API which supports more than one ports.
Hal Rosenstock
halr at voltaire.com
Wed Aug 30 09:13:41 PDT 2006
Hi Sasha,
On Tue, 2006-08-29 at 21:29, Sasha Khapyorsky wrote:
> Hi Hal,
>
> On 20:09 Tue 29 Aug , Hal Rosenstock wrote:
> > Hi Sasha,
> >
> > On Fri, 2006-08-25 at 09:17, Sasha Khapyorsky wrote:
> > > This provides RPC like API which may work with several ports.
> >
> > I think you mean "can work" rather "may work" :-)
>
> Yes.
>
> Some limitation we will have from libumad - this tracks already open
> ports. I'm not sure why (the same port can be opened from another
> process or by forking current). I think this may be the next
> improvement there.
OK.
> > > Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
> > > ---
> > >
> > > libibmad/include/infiniband/mad.h | 9 +++
> > > libibmad/src/libibmad.map | 4 +
> > > libibmad/src/register.c | 20 +++++--
> > > libibmad/src/rpc.c | 106 +++++++++++++++++++++++++++++++++++--
> > > libibumad/src/umad.c | 4 +
> >
> > ../doc/libibmad.txt should also be updated appropriately for the new
> > routines.
>
> Sure, I thought to stabilize this API first.
OK.
> > > 5 files changed, 130 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/libibmad/include/infiniband/mad.h b/libibmad/include/infiniband/mad.h
> > > index 45ff572..bd8a80b 100644
> > > --- a/libibmad/include/infiniband/mad.h
> > > +++ b/libibmad/include/infiniband/mad.h
> > > @@ -660,6 +660,7 @@ uint64_t mad_trid(void);
> > > int mad_build_pkt(void *umad, ib_rpc_t *rpc, ib_portid_t *dport, ib_rmpp_hdr_t *rmpp, void *data);
> > >
> > > /* register.c */
> > > +int mad_register_port_client(int port_id, int mgmt, uint8_t rmpp_version);
> > > int mad_register_client(int mgmt, uint8_t rmpp_version);
> > > int mad_register_server(int mgmt, uint8_t rmpp_version,
> > > uint32_t method_mask[4], uint32_t class_oui);
> > > @@ -704,6 +705,14 @@ void madrpc_lock(void);
> > > void madrpc_unlock(void);
> > > void madrpc_show_errors(int set);
> > >
> > > +void * mad_rpc_open_port(char *dev_name, int dev_port, int *mgmt_classes,
> > > + int num_classes);
> > > +void mad_rpc_close_port(void *ibmad_port);
> > > +void * mad_rpc(void *ibmad_port, ib_rpc_t *rpc, ib_portid_t *dport,
> > > + void *payload, void *rcvdata);
> > > +void * mad_rpc_rmpp(void *ibmad_port, ib_rpc_t *rpc, ib_portid_t *dport,
> > > + ib_rmpp_hdr_t *rmpp, void *data);
> > > +
> > > /* smp.c */
> > > uint8_t * smp_query(void *buf, ib_portid_t *id, uint attrid, uint mod,
> > > uint timeout);
> > > diff --git a/libibmad/src/libibmad.map b/libibmad/src/libibmad.map
> > > index bf81bd1..78b7ff0 100644
> > > --- a/libibmad/src/libibmad.map
> > > +++ b/libibmad/src/libibmad.map
> > > @@ -62,6 +62,10 @@ IBMAD_1.0 {
> >
> > This should be 1.1
>
> Ok.
>
> >
> > > ib_resolve_self;
> > > ib_resolve_smlid;
> > > ibdebug;
> > > + mad_rpc_open_port;
> > > + mad_rpc_close_port;
> > > + mad_rpc;
> > > + mad_rpc_rmpp;
> > > madrpc;
> > > madrpc_def_timeout;
> > > madrpc_init;
> >
> > What about mad_register_port_client ? Should that be included here ?
>
> It is not used externally - all registrations are done in _open(). So I
> don't see this as part of the new "API". Maybe if we will decide to
> extend it later we will need to "export" this symbol.
OK.
> > > diff --git a/libibmad/src/register.c b/libibmad/src/register.c
> > > index 4f44625..52d6989 100644
> > > --- a/libibmad/src/register.c
> > > +++ b/libibmad/src/register.c
> > > @@ -43,6 +43,7 @@ #include <unistd.h>
> > > #include <pthread.h>
> > > #include <sys/time.h>
> > > #include <string.h>
> > > +#include <errno.h>
> > >
> > > #include <umad.h>
> > > #include "mad.h"
> > > @@ -118,7 +119,7 @@ mad_agent_class(int agent)
> > > }
> > >
> > > int
> > > -mad_register_client(int mgmt, uint8_t rmpp_version)
> > > +mad_register_port_client(int port_id, int mgmt, uint8_t rmpp_version)
> > > {
> > > int vers, agent;
> > >
> > > @@ -126,7 +127,7 @@ mad_register_client(int mgmt, uint8_t rm
> > > DEBUG("Unknown class %d mgmt_class", mgmt);
> > > return -1;
> > > }
> > > - if ((agent = umad_register(madrpc_portid(), mgmt,
> > > + if ((agent = umad_register(port_id, mgmt,
> > > vers, rmpp_version, 0)) < 0) {
> > > DEBUG("Can't register agent for class %d", mgmt);
> > > return -1;
> > > @@ -137,13 +138,22 @@ mad_register_client(int mgmt, uint8_t rm
> > > return -1;
> > > }
> > >
> > > - if (register_agent(agent, mgmt) < 0)
> > > - return -1;
> > > -
> > > return agent;
> > > }
> > >
> > > int
> > > +mad_register_client(int mgmt, uint8_t rmpp_version)
> > > +{
> > > + int agent;
> > > +
> > > + agent = mad_register_port_client(madrpc_portid(), mgmt, rmpp_version);
> > > + if (agent < 0)
> > > + return agent;
> > > +
> > > + return register_agent(agent, mgmt);
> > > +}
> > > +
> > > +int
> > > mad_register_server(int mgmt, uint8_t rmpp_version,
> > > uint32_t method_mask[4], uint32_t class_oui)
> > > {
> > > diff --git a/libibmad/src/rpc.c b/libibmad/src/rpc.c
> > > index b2d3e77..ac4f361 100644
> > > --- a/libibmad/src/rpc.c
> > > +++ b/libibmad/src/rpc.c
> > > @@ -48,6 +48,13 @@ #include <errno.h>
> > > #include <umad.h>
> > > #include "mad.h"
> > >
> > > +#define MAX_CLASS 256
> > > +
> > > +struct ibmad_port {
> > > + int port_id; /* file descriptor returned by umad_open() */
> > > + int class_agents[MAX_CLASS]; /* class2agent mapper */
> > > +};
> > > +
> > > int ibdebug;
> > >
> > > static int mad_portid = -1;
> > > @@ -105,7 +112,8 @@ madrpc_portid(void)
> > > }
> > >
> > > static int
> > > -_do_madrpc(void *sndbuf, void *rcvbuf, int agentid, int len, int timeout)
> > > +_do_madrpc(int port_id, void *sndbuf, void *rcvbuf, int agentid, int len,
> > > + int timeout)
> > > {
> > > uint32_t trid; /* only low 32 bits */
> > > int retries;
> > > @@ -133,7 +141,7 @@ _do_madrpc(void *sndbuf, void *rcvbuf, i
> > > }
> > >
> > > length = len;
> > > - if (umad_send(mad_portid, agentid, sndbuf, length, timeout, 0) < 0) {
> > > + if (umad_send(port_id, agentid, sndbuf, length, timeout, 0) < 0) {
> > > IBWARN("send failed; %m");
> > > return -1;
> > > }
> > > @@ -141,7 +149,7 @@ _do_madrpc(void *sndbuf, void *rcvbuf, i
> > > /* Use same timeout on receive side just in case */
> > > /* send packet is lost somewhere. */
> > > do {
> > > - if (umad_recv(mad_portid, rcvbuf, &length, timeout) < 0) {
> > > + if (umad_recv(port_id, rcvbuf, &length, timeout) < 0) {
> > > IBWARN("recv failed: %m");
> > > return -1;
> > > }
> > > @@ -164,8 +172,10 @@ _do_madrpc(void *sndbuf, void *rcvbuf, i
> > > }
> > >
> > > void *
> > > -madrpc(ib_rpc_t *rpc, ib_portid_t *dport, void *payload, void *rcvdata)
> > > +mad_rpc(void *port_id, ib_rpc_t *rpc, ib_portid_t *dport, void *payload,
> > > + void *rcvdata)
> > > {
> > > + struct ibmad_port *p = port_id;
> > > int status, len;
> > > uint8_t sndbuf[1024], rcvbuf[1024], *mad;
> > >
> > > @@ -175,7 +185,8 @@ madrpc(ib_rpc_t *rpc, ib_portid_t *dport
> > > if ((len = mad_build_pkt(sndbuf, rpc, dport, 0, payload)) < 0)
> > > return 0;
> > >
> > > - if ((len = _do_madrpc(sndbuf, rcvbuf, mad_class_agent(rpc->mgtclass),
> > > + if ((len = _do_madrpc(p->port_id, sndbuf, rcvbuf,
> > > + p->class_agents[rpc->mgtclass],
> > > len, rpc->timeout)) < 0)
> > > return 0;
> > >
> > > @@ -198,8 +209,10 @@ madrpc(ib_rpc_t *rpc, ib_portid_t *dport
> > > }
> > >
> > > void *
> > > -madrpc_rmpp(ib_rpc_t *rpc, ib_portid_t *dport, ib_rmpp_hdr_t *rmpp, void *data)
> > > +mad_rpc_rmpp(void *port_id, ib_rpc_t *rpc, ib_portid_t *dport,
> > > + ib_rmpp_hdr_t *rmpp, void *data)
> > > {
> > > + struct ibmad_port *p = port_id;
> > > int status, len;
> > > uint8_t sndbuf[1024], rcvbuf[1024], *mad;
> > >
> > > @@ -210,7 +223,8 @@ madrpc_rmpp(ib_rpc_t *rpc, ib_portid_t *
> > > if ((len = mad_build_pkt(sndbuf, rpc, dport, rmpp, data)) < 0)
> > > return 0;
> > >
> > > - if ((len = _do_madrpc(sndbuf, rcvbuf, mad_class_agent(rpc->mgtclass),
> > > + if ((len = _do_madrpc(p->port_id, sndbuf, rcvbuf,
> > > + p->class_agents[rpc->mgtclass],
> > > len, rpc->timeout)) < 0)
> > > return 0;
> > >
> > > @@ -249,6 +263,24 @@ madrpc_rmpp(ib_rpc_t *rpc, ib_portid_t *
> > > return data;
> > > }
> > >
> > > +void *
> > > +madrpc(ib_rpc_t *rpc, ib_portid_t *dport, void *payload, void *rcvdata)
> > > +{
> > > + struct ibmad_port port;
> > > + port.port_id = mad_portid;
> > > + port.class_agents[rpc->mgtclass] = mad_class_agent(rpc->mgtclass);
> > > + return mad_rpc(&port, rpc, dport, payload, rcvdata);
> > > +}
> > > +
> > > +void *
> > > +madrpc_rmpp(ib_rpc_t *rpc, ib_portid_t *dport, ib_rmpp_hdr_t *rmpp, void *data)
> > > +{
> > > + struct ibmad_port port;
> > > + port.port_id = mad_portid;
> > > + port.class_agents[rpc->mgtclass] = mad_class_agent(rpc->mgtclass);
> > > + return mad_rpc_rmpp(&port, rpc, dport, rmpp, data);
> > > +}
> > > +
> > > static pthread_mutex_t rpclock = PTHREAD_MUTEX_INITIALIZER;
> > >
> > > void
> > > @@ -282,3 +314,63 @@ madrpc_init(char *dev_name, int dev_port
> > > IBPANIC("client_register for mgmt %d failed", mgmt);
> > > }
> > > }
> > > +
> > > +void *
> > > +mad_rpc_open_port(char *dev_name, int dev_port,
> > > + int *mgmt_classes, int num_classes)
> > > +{
> > > + struct ibmad_port *p;
> > > + int port_id;
> >
> > Should there be some validation on num_classes < MAX_CLASS ?
>
> Such check is cheap and may be performed (it was not done in
> madrpc_init()).
Guess that validation is needed in both places. I'll add it subsequent
to this.
> Without this the function will "work" (will fail), but in longer way
> (this will fail to register an agent when MAX_CLASS will be overflowed).
Won't it overwrite some structure (scribble on memory) ?
> > > + if (umad_init() < 0) {
> > > + IBWARN("can't init UMAD library");
> > > + errno = ENODEV;
> > > + return NULL;
> > > + }
> > > +
> > > + p = malloc(sizeof(*p));
> > > + if (!p) {
> > > + errno = ENOMEM;
> > > + return NULL;
> > > + }
> > > + memset(p, 0, sizeof(*p));
> > > +
> > > + if ((port_id = umad_open_port(dev_name, dev_port)) < 0) {
> > > + IBWARN("can't open UMAD port (%s:%d)", dev_name, dev_port);
> > > + if (!errno)
> > > + errno = EIO;
> > > + free(p);
> > > + return NULL;
> > > + }
> > > +
> > > + while (num_classes--) {
> > > + int rmpp_version = 0;
> > > + int mgmt = *mgmt_classes++;
> > > + int agent;
> > > +
> > > + if (mgmt == IB_SA_CLASS)
> > > + rmpp_version = 1;
> >
> > There are other classes which can use RMPP. How are they handled ?
>
> This is copy & paste from madrpc_init().
> This problem is generic for libibmad and I think should be fixed
> separately
You are right :-(
> (maybe in mad_register_port_client()).
Perhaps. We'll see.
> > > + if (mgmt < 0 || mgmt >= MAX_CLASS ||
> > > + (agent = mad_register_port_client(port_id, mgmt,
> > > + rmpp_version)) < 0) {
> > > + IBWARN("client_register for mgmt %d failed", mgmt);
> > > + if(!errno)
> > > + errno = EINVAL;
> > > + umad_close_port(port_id);
> > > + free(p);
> > > + return NULL;
> > > + }
> > > + p->class_agents[mgmt] = agent;
> > > + }
> > > +
> > > + p->port_id = port_id;
> > > + return p;
> > > +}
> > > +
> > > +void
> > > +mad_rpc_close_port(void *port_id)
> > > +{
> > > + struct ibmad_port *p = port_id;
> > > + umad_close_port(p->port_id);
> > > + free(p);
> > > +}
> > > diff --git a/libibumad/src/umad.c b/libibumad/src/umad.c
> > > index a99fb5a..cb9eef6 100644
> > > --- a/libibumad/src/umad.c
> > > +++ b/libibumad/src/umad.c
> > > @@ -93,12 +93,14 @@ port_alloc(int portid, char *dev, int po
> > >
> > > if (portid < 0 || portid >= UMAD_MAX_PORTS) {
> > > IBWARN("bad umad portid %d", portid);
> > > + errno = EINVAL;
> > > return 0;
> > > }
> > >
> > > if (port->dev_name[0]) {
> > > IBWARN("umad port id %d is already allocated for %s %d",
> > > portid, port->dev_name, port->dev_port);
> > > + errno = EBUSY;
> > > return 0;
> > > }
> > >
> > > @@ -567,7 +569,7 @@ umad_open_port(char *ca_name, int portnu
> > > return -EINVAL;
> > >
> > > if (!(port = port_alloc(umad_id, ca_name, portnum)))
> > > - return -EINVAL;
> > > + return -errno;
> > >
> > > snprintf(port->dev_file, sizeof port->dev_file - 1, "%s/umad%d",
> > > UMAD_DEV_DIR , umad_id);
> >
> > Is the umad.c change really a separate change from the rest ?
>
> It was done in order to provide the meanfull errno value in case of
> mad_rpc_open() failure (not needed with madrpc_init() because it does
> exit() if something is wrong) and this can be separated.
>
> > If so,
> > this patch should be broken into two parts and that is the first part.
>
> Agree.
>
> > No need to resubmit for this.
>
> Ok. And for the rest of changes?
Yes.
-- Hal
> Sasha
>
> >
> > -- Hal
> >
More information about the general
mailing list