[ofa-general] Re: [PATCH] ibsim: Support short RMPP packets

Hal Rosenstock hrosenstock at xsigo.com
Thu Jun 12 14:40:15 PDT 2008


On Thu, 2008-06-12 at 23:51 +0300, Sasha Khapyorsky wrote:
> On 06:53 Wed 11 Jun     , Hal Rosenstock wrote:
> > ibsim: Support for short RMPP packets (up to 256 bytes total)
> > 
> > If this is acceptable, I'll follow this up with long packet support.
> 
> RMPP support is acceptable and needed :). I would suggest to discuss
> design aspects. I had some experimental "RMPP support" (actually - "long
> packets support") for ibsim, for doing this workable I switched to
> stream sockets. Finally I saw bad performance numbers with huge queries.
> 
> Some comments about this patch are below.
> 
> [snip...]
> > diff --git a/ibsim/sim_mad.c b/ibsim/sim_mad.c
> > index 675d95b..eef0bab
> > --- a/ibsim/sim_mad.c
> > +++ b/ibsim/sim_mad.c
> > @@ -1,5 +1,6 @@
> >  /*
> >   * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
> > + * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
> >   *
> >   * This file is part of ibsim.
> >   *
> > @@ -1128,14 +1129,14 @@ Smpfn *get_handle_fn(ib_rpc_t rpc, int response)
> >  		return fn;
> >  	}
> >  
> > -	return 0;		// No MGTCLASS matched .
> > +	return 0;		// No MGTCLASS matched.
> >  }
> >  
> >  int process_packet(Client * cl, void *p, int size, Client ** dcl)
> >  {
> >  	struct sim_request *r = p;
> >  	Port *port;
> > -	uint8_t data[256];
> > +	uint8_t data[MAD_BLOCK_SIZE];
> >  	int status, tlid, tqp;
> >  	int response;
> >  	Smpfn *fn;
> > @@ -1144,9 +1145,10 @@ int process_packet(Client * cl, void *p, int size, Client ** dcl)
> >  
> >  	*dcl = cl;
> >  
> > -	DEBUG("client %d, size %d", cl->id, size);
> > -	if (size != sizeof(*r)) {
> > -		IBWARN("bad packet size %d (!= %zu)", size, sizeof(*r));
> > +	DEBUG("client %d size %d", cl->id, size);
> > +	if (size > sizeof(*r) + MAD_BLOCK_SIZE) {
> 
> What about partial write?

This change was needed for short RMPP.

>  The socket is non-blocked datagram.

Is a framing layer needed ?

> > +		IBWARN("unsupported packet size %d (> %zu)", size,
> > +		       sizeof(*r) + MAD_BLOCK_SIZE);
> >  		return -1;
> >  	}
> >  
> > @@ -1183,7 +1185,7 @@ int process_packet(Client * cl, void *p, int size, Client ** dcl)
> >  		VERB("forward pkt to client %d pid %d attr %d",
> >  		     (*dcl)->id, (*dcl)->pid, rpc.attr.id);
> >  		forward_MAD(r->mad, &rpc, &path);
> > -		return sizeof(*r);	// forward only
> > +		return size;				// forward only
> >  	}
> >  
> >  	if (port->errrate && (random() % 100) < port->errrate) {
> > @@ -1214,12 +1216,12 @@ int process_packet(Client * cl, void *p, int size, Client ** dcl)
> >  		VERB("PKT roll back did not succeed");
> >  		goto _dropped;
> >  	}
> > -	return sizeof(*r);
> > +	return sizeof(*r) + MAD_BLOCK_SIZE;
> >  
> >    _dropped:
> >  	r->status = htonl(110);
> >  	*dcl = cl;
> > -	return sizeof(*r);
> > +	return sizeof(*r) + MAD_BLOCK_SIZE;
> >  }
> >  
> >  static int encode_trap128(Port * port, char *data)
> > @@ -1279,7 +1281,7 @@ static int encode_trap_header(char *buf)
> >  
> >  int send_trap(Port * port, int trapnum)
> >  {
> > -	struct sim_request req;
> > +	struct sim_req256 req;
> >  	Client *cl;
> >  	int ret, lid = port->lid;
> >  	char *data = req.mad + 64;	/* data offset */
> > diff --git a/include/ibsim.h b/include/ibsim.h
> > index 84568e6..14a3f90 100644
> > --- a/include/ibsim.h
> > +++ b/include/ibsim.h
> > @@ -1,5 +1,6 @@
> >  /*
> >   * Copyright (c) 2006,2007 Voltaire, Inc. All rights reserved.
> > + * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
> >   *
> >   * This file is part of ibsim.
> >   *
> > @@ -61,6 +62,8 @@ struct sim_port {
> >  #define SIM_MAGIC	0xdeadbeef
> >  #define SIM_CTL_MAX_DATA	64
> >  
> > +#define MAD_BLOCK_SIZE		256
> > +
> >  struct sim_request {
> >  	uint32_t dlid;
> >  	uint32_t slid;
> > @@ -68,7 +71,17 @@ struct sim_request {
> >  	uint32_t sqp;
> >  	uint32_t status;
> >  	uint64_t context;
> > -	char mad[256];
> > +	char mad[0];
> > +};
> > +
> > +struct sim_req256 {
> > +	uint32_t dlid;
> > +	uint32_t slid;
> > +	uint32_t dqp;
> > +	uint32_t sqp;
> > +	uint32_t status;
> > +	uint64_t context;
> > +	char mad[MAD_BLOCK_SIZE];
> >  };
> 
> Why do we need two separate structures?

The second one was just to eliminate the memory allocation (and use the
stack). Only forwarded packets have the variability in size.

> >  enum SIM_CTL_TYPES {
> > diff --git a/umad2sim/umad2sim.c b/umad2sim/umad2sim.c
> > index 4cbf8da..9c69fb5 100644
> > --- a/umad2sim/umad2sim.c
> > +++ b/umad2sim/umad2sim.c
> > @@ -1,5 +1,6 @@
> >  /*
> >   * Copyright (c) 2006,2007 Voltaire, Inc. All rights reserved.
> > + * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
> >   *
> >   * This file is part of ibsim.
> >   *
> > @@ -376,7 +377,7 @@ static int dev_sysfs_create(struct umad2sim_dev *dev)
> >  
> >  static ssize_t umad2sim_read(struct umad2sim_dev *dev, void *buf, size_t count)
> >  {
> > -	struct sim_request req;
> > +	struct sim_req256 req;
> >  	ib_user_mad_t *umad = (ib_user_mad_t *) buf;
> >  	unsigned mgmt_class;
> >  	int cnt;
> > @@ -385,11 +386,6 @@ static ssize_t umad2sim_read(struct umad2sim_dev *dev, void *buf, size_t count)
> >  
> >  	cnt = real_read(dev->sim_client.fd_pktin, &req, sizeof(req));
> >  	DEBUG("umad2sim_read: got %d...\n", cnt);
> > -	if (cnt < sizeof(req)) {
> > -		ERROR("umad2sim_read: partial request - skip.\n");
> > -		umad->status = EAGAIN;
> > -		return umad_size();
> > -	}
> >  
> >  	mgmt_class = mad_get_field(req.mad, 0, IB_MAD_MGMTCLASS_F);
> 
> This checked that we got at least req header before decode this.

No; I think it checked that the entire packet existed as req was the
entire packet not just the header.

> Why is it removed?

Because of short RMPP. It can add back at least the header check part
but I think the partial write (long writes) is the fundamental issue.

> > @@ -411,7 +407,7 @@ static ssize_t umad2sim_read(struct umad2sim_dev *dev, void *buf, size_t count)
> >  	umad->status = ntohl(req.status);
> >  	umad->timeout_ms = 0;
> >  	umad->retries = 0;
> > -	umad->length = umad_size() + sizeof(req.mad);
> > +	umad->length = umad_size() + cnt;
> 
> 'cnt' is normally actual MAD length + sim_request header size. Is it
> correct to put it as is in umad->length (shouldn't be umad_size() + cnt
> - sizeof(struct sim_request))?

Not sure about this. I'll need to get back on this.

> >  
> >  	umad->addr.qpn = req.sqp;
> >  	umad->addr.qkey = 0;	// agent->qkey;
> > @@ -431,9 +427,9 @@ static ssize_t umad2sim_read(struct umad2sim_dev *dev, void *buf, size_t count)
> >  static ssize_t umad2sim_write(struct umad2sim_dev *dev,
> >  			      const void *buf, size_t count)
> >  {
> > -	struct sim_request req;
> > +	struct sim_request *req;
> >  	ib_user_mad_t *umad = (ib_user_mad_t *) buf;
> > -	int cnt;
> > +	int cnt, ocnt;
> >  
> >  #ifdef SIMULATE_SEND_ERRORS
> >  	{ static int err_count;
> > @@ -464,25 +460,32 @@ static ssize_t umad2sim_write(struct umad2sim_dev *dev,
> >  	      mad_get_field(umad_get_mad(umad), 0, IB_MAD_ATTRMOD_F)
> >  	    );
> >  
> > -	req.dlid = umad->addr.lid;
> > -	req.slid = req.dlid == 0xffff ? 0xffff : 0;	/* 0 - means auto
> > +	cnt = count - umad_size();
> > +	if (cnt > MAD_BLOCK_SIZE)
> > +		cnt = MAD_BLOCK_SIZE;
> > +	req = malloc(sizeof(*req) + cnt);
> 
> So why malloc() if size is limited by struct sim_request anyway?

Because long RMPP isn't handled yet.

> What was a reason to change 'struct sim_request r' to 'struct sim_request
> *r'?

Because sim_request is now only the header and the memory needs to be
malloc'd.

It's to get ready for more flexible packet sizes (long RMPP) as only the
cnt limiting would be removed here. Also, short RMPP cnts are smaller
than MAD_BLOCK_SIZE.

-- Hal

> Sasha
> 
> > +	if (!req) {
> > +		ERROR("umad2sim_write: no mem for sim req: %m");
> > +		return -1;
> > +	}
> > +
> > +	req->dlid = umad->addr.lid;
> > +	req->slid = req->dlid == 0xffff ? 0xffff : 0;	/* 0 - means auto
> >  							   (supported by ibsim) */ ;
> > -	req.dqp = umad->addr.qpn;
> > -	req.sqp = htonl(dev->agents[umad->agent_id].qpn);
> > -	req.status = 0;
> > -	req.context = 0;
> > +	req->dqp = umad->addr.qpn;
> > +	req->sqp = htonl(dev->agents[umad->agent_id].qpn);
> > +	req->status = 0;
> > +	req->context = 0;
> >  
> > -	cnt = count - umad_size();
> > -	if (cnt > sizeof(req.mad))
> > -		cnt = sizeof(req.mad);
> > -	memcpy(req.mad, umad_get_mad(umad), cnt);
> > +	memcpy(req->mad, umad_get_mad(umad), cnt);
> >  
> > -	cnt = write(dev->sim_client.fd_pktout, (void *)&req, sizeof(req));
> > -	if (cnt < 0) {
> > +	ocnt = write(dev->sim_client.fd_pktout, req, sizeof(*req) + cnt);
> > +	free(req);
> > +	if (ocnt < 0) {
> >  		ERROR("umad2sim_write: cannot write\n");
> >  		return -1;
> >  	}
> > -	if (cnt < sizeof(req)) {
> > +	if (ocnt < sizeof(*req) + cnt) {
> >  		ERROR("umad2sim_write: partial write\n");
> >  	}
> >  
> > 
> > 
> _______________________________________________
> general mailing list
> general at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
> 
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general




More information about the general mailing list